{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np # linear algebra\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import base64\n",
    "import string\n",
    "import re\n",
    "from collections import Counter\n",
    "from nltk.corpus import stopwords\n",
    "stopwords = stopwords.words('english')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Title</th>\n",
       "      <th>Conference</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Innovation in Database Management: Computer Sc...</td>\n",
       "      <td>VLDB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>High performance prime field multiplication fo...</td>\n",
       "      <td>ISCAS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>enchanted scissors: a scissor interface for su...</td>\n",
       "      <td>SIGGRAPH</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Detection of channel degradation attack by Int...</td>\n",
       "      <td>INFOCOM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Pinning a Complex Network through the Betweenn...</td>\n",
       "      <td>ISCAS</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               Title Conference\n",
       "0  Innovation in Database Management: Computer Sc...       VLDB\n",
       "1  High performance prime field multiplication fo...      ISCAS\n",
       "2  enchanted scissors: a scissor interface for su...   SIGGRAPH\n",
       "3  Detection of channel degradation attack by Int...    INFOCOM\n",
       "4  Pinning a Complex Network through the Betweenn...      ISCAS"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('research_paper.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2507, 2)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Title         0\n",
       "Conference    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Conference'].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "train, test = train_test_split(df, test_size=0.33, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Research title sample: Cooperating with Smartness: Using Heterogeneous Smart Antennas in Ad-Hoc Networks.\n",
      "Conference of this paper: INFOCOM\n",
      "Training Data Shape: (1679, 2)\n",
      "Testing Data Shape: (828, 2)\n"
     ]
    }
   ],
   "source": [
    "print('Research title sample:', train['Title'].iloc[0])\n",
    "print('Conference of this paper:', train['Conference'].iloc[0])\n",
    "print('Training Data Shape:', train.shape)\n",
    "print('Testing Data Shape:', test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfgAAAD8CAYAAACbzrbdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAF/pJREFUeJzt3XuYJXV95/H3RxHvisCABDCDMN5Q\nuTgigqsrJBsvKOBKFI2CDzrxWSTeFiWrG01Wd72swWXNEhGM4CpCvCKyKMFBHw0IMwgDCMqAFyYg\nDHLxDqLf/aN+DYemZ+b0TNd0d8379Tz9nFO/+lWd76npqc/5VVXXSVUhSZKG5X6zXYAkSZp5Brwk\nSQNkwEuSNEAGvCRJA2TAS5I0QAa8JEkDZMBLkjRABrwkSQNkwEuSNECbzXYBG2LrrbeuhQsXznYZ\nkiRtFMuXL7+5qhaM03deB/zChQtZtmzZbJchSdJGkeTH4/b1EL0kSQNkwEuSNEAGvCRJA2TAS5I0\nQAa8JEkDZMBLkjRABrwkSQNkwEuSNEAGvCRJAzSv72Q3jqcdfcpslzBnLf/gq2e7BElSTxzBS5I0\nQAa8JEkDZMBLkjRABrwkSQNkwEuSNEAGvCRJA2TAS5I0QAa8JEkDZMBLkjRABrwkSQNkwEuSNEAG\nvCRJA2TAS5I0QAa8JEkDZMBLkjRABrwkSQNkwEuSNEAGvCRJA2TAS5I0QL0GfJIfJbksySVJlrW2\nLZOck+Tq9vio1p4kxyVZmWRFkj37rE2SpCHbGCP451bV7lW1uE0fA5xbVYuAc9s0wPOBRe1nCXD8\nRqhNkqRBmo1D9AcCJ7fnJwMHjbSfUp0LgC2SbDcL9UmSNO/1HfAFfC3J8iRLWtu2VXUDQHvcprVv\nD1w3suyq1iZJkqZps57Xv29VXZ9kG+CcJFetpW+maKv7dOo+KCwBeMxjHjMzVUqSNDC9juCr6vr2\neBPwBWAv4MaJQ+/t8abWfRWw48jiOwDXT7HOE6pqcVUtXrBgQZ/lS5I0b/UW8EkemuThE8+B/wBc\nDpwBHNa6HQZ8qT0/A3h1u5p+b+D2iUP5kiRpevo8RL8t8IUkE6/z6ao6O8lFwOlJjgB+AhzS+p8F\nvABYCfwaeE2PtUmSNGi9BXxVXQvsNkX7z4D9p2gv4Mi+6pEkaVPinewkSRogA16SpAEy4CVJGiAD\nXpKkATLgJUkaIANekqQBMuAlSRogA16SpAEy4CVJGiADXpKkATLgJUkaIANekqQBMuAlSRogA16S\npAEy4CVJGiADXpKkATLgJUkaIANekqQBMuAlSRogA16SpAEy4CVJGiADXpKkATLgJUkaIANekqQB\nMuAlSRogA16SpAEy4CVJGiADXpKkAeo94JPcP8l3k5zZpndK8p0kVyc5Lcnmrf2BbXplm7+w79ok\nSRqqjTGCfyNw5cj0+4Fjq2oRcCtwRGs/Ari1qnYBjm39JEnSeug14JPsALwQOLFNB9gP+GzrcjJw\nUHt+YJumzd+/9ZckSdPU9wj+w8DbgD+06a2A26rqrja9Cti+Pd8euA6gzb+99b+XJEuSLEuybPXq\n1X3WLknSvNVbwCc5ALipqpaPNk/RtcaYd09D1QlVtbiqFi9YsGAGKpUkaXg263Hd+wIvTvIC4EHA\nI+hG9Fsk2ayN0ncArm/9VwE7AquSbAY8Erilx/okSRqs3kbwVfXXVbVDVS0EXg58vapeCSwFXtq6\nHQZ8qT0/o03T5n+9qu4zgpckSes2G38H/3bgLUlW0p1jP6m1nwRs1drfAhwzC7VJkjQIfR6iv1tV\nnQec155fC+w1RZ/fAodsjHokSRo672QnSdIAGfCSJA2QAS9J0gAZ8JIkDZABL0nSABnwkiQN0FgB\nn+QhSf5rko+16UXtVrSSJGkOGncE/0/AHcAz2/Qq4D29VCRJkjbYuAG/c1V9APgdQFX9hqm/HEaS\nJM0B4wb8nUkeTPt2tyQ7043oJUnSHDTurWrfBZwN7JjkU3TfFHd4X0VpfvnJ3z1ltkuY0x7zN5fN\ndgmSNkFjBXxVnZPkYmBvukPzb6yqm3utTJIkrbdxr6I/GLirqr5SVWcCdyU5qN/SJEnS+hr3HPy7\nqur2iYmquo3usL0kSZqDxg34qfptlK+alSRJ0zduwC9L8vdJdk7y2CTHAsv7LEySJK2/cQP+KOBO\n4DTgn4HfAkf2VZQkSdow415F/yvgmJ5rkSRJM2SsgE/yOOA/AwtHl6mq/fopS5IkbYhxL5T7Z+Af\ngROB3/dXjiRJmgnjBvxdVXV8r5VIkqQZM+5Fdl9O8p+SbJdky4mfXiuTJEnrbdwR/GHt8eiRtgIe\nO7PlSJKkmTDuVfQ79V2IJEmaOePei/4hSd6Z5IQ2vSjJAf2WJkmS1te45+D/ie5GN/u06VXAe3qp\nSJIkbbBxA37nqvoA8DuAqvoN3dfGSpKkOWjcgL8zyYPpLqwjyc7AHWtbIMmDklyY5NIkVyT529a+\nU5LvJLk6yWlJNm/tD2zTK9v8hev9riRJ2sSN/XWxwNnAjkk+BZwLvG0dy9wB7FdVuwG7A89Lsjfw\nfuDYqloE3Aoc0fofAdxaVbsAx7Z+kiRpPawz4JMEuAp4CXA4cCqwuKrOW9ty1fllm3xA+ylgP+Cz\nrf1k4KD2/MA2TZu/f3ttSZI0TesM+Koq4ItV9bOq+kpVnVlVN4+z8iT3T3IJcBNwDnANcFtV3dW6\nrAK2b8+3B65rr3kXcDuw1bTejSRJAsY/RH9BkqdPd+VV9fuq2h3YAdgLeOJU3drjVKP1mtyQZEmS\nZUmWrV69erolSZK0SRg34J9LF/LXJFmR5LIkK8Z9kaq6DTgP2BvYIsnEDXZ2AK5vz1cBOwK0+Y8E\nbpliXSdU1eKqWrxgwYJxS5AkaZMy7q1qnz/dFSdZAPyuqm5rV+D/Cd2Fc0uBlwKfobsF7pfaIme0\n6fPb/K+30wOSJGmaxhrBV9WP6UbX+7Xnvx5j2e2ApW2kfxFwTlWdCbwdeEuSlXTn2E9q/U8Ctmrt\nbwGOme6bkSRJnbFG8EneBSwGHk93V7sHAP8X2HdNy1TVCmCPKdqvpTsfP7n9t8AhY1UtSZLWatxz\n8AcDLwZ+BVBV1wMP76soSZK0YcY9B39nVVWSiTvZPbTHmiRNYd//vcYDZpu8bx/17dkuQZpzxh3B\nn57ko3RXwL8O+BfgY/2VJUmSNsRaR/BJHlhVd1TV/0zyp8DP6c7D/01VnbNRKpQkSdO2rkP05wN7\nJvlkVb2K7m50kiRpjltXwG+e5DBgnyQvmTyzqj7fT1mSJGlDrCvgXw+8EtgCeNGkeQUY8JIkzUFr\nDfiq+hbwrSTLquqktfWVJElzx1h/JldVJyXZB1g4ukxVndJTXZIkaQOMeye7TwI7A5cAv2/NBRjw\nkiTNQePe6GYx8CS//EWSpPlh3BvdXA48us9CJEnSzBl3BL818L0kFwJ3TDRW1Yt7qUqSZsE3nv2c\n2S5hznrON78x2yVomsYN+Hf3WYQkSZpZ415F/40k2wJPb00XVtVN/ZUlSZI2xFjn4JP8OXAh3fe1\n/znwnSQv7bMwSZK0/sY9RP8O4OkTo/YkC+i+Ue6zfRUmSZLW37hX0d9v0iH5n01jWUmStJGNO4I/\nO8lXgVPb9MuAs/opSZIkbah1fR/8LsC2VXV0+za5ZwGh+xrZT22E+iRJ0npY12H2DwO/gO6rYavq\nLVX1ZrrR+4f7Lk6SJK2fdQX8wqpaMbmxqpbRffGMJEmag9YV8A9ay7wHz2QhkiRp5qwr4C9K8rrJ\njUmOAJb3U5IkSdpQ67qK/k3AF5K8knsCfTGwOXBwn4VJkobnI2/98myXMKe94UMvmrF1rTXgq+pG\nYJ8kzwWe3Jq/UlVfn7EKJEnSjBv3XvRLgaU91yJJkmaId6OTJGmAegv4JDsmWZrkyiRXJHlja98y\nyTlJrm6Pj2rtSXJckpVJViTZs6/aJEkauj5H8HcBb62qJwJ7A0cmeRJwDHBuVS0Czm3TAM8HFrWf\nJcDxPdYmSdKg9RbwVXVDVV3cnv8CuBLYHjgQOLl1Oxk4qD0/EDilOhcAWyTZrq/6JEkaso1yDj7J\nQmAP4Dt097a/AboPAcA2rdv2wHUji61qbZPXtSTJsiTLVq9e3WfZkiTNW70HfJKHAZ8D3lRVP19b\n1yna6j4NVSdU1eKqWrxgwYKZKlOSpEHpNeCTPIAu3D9VVZ9vzTdOHHpvjxPfM78K2HFk8R2A6/us\nT5KkoerzKvoAJwFXVtXfj8w6AzisPT8M+NJI+6vb1fR7A7dPHMqXJEnTM9aNbtbTvsCrgMuSXNLa\n/gvwPuD0dj/7nwCHtHlnAS8AVgK/Bl7TY22SJA1abwFfVd9i6vPqAPtP0b+AI/uqR5KkTYl3spMk\naYAMeEmSBsiAlyRpgAx4SZIGyICXJGmADHhJkgbIgJckaYAMeEmSBsiAlyRpgAx4SZIGyICXJGmA\nDHhJkgbIgJckaYAMeEmSBsiAlyRpgAx4SZIGyICXJGmADHhJkgbIgJckaYAMeEmSBsiAlyRpgAx4\nSZIGyICXJGmADHhJkgbIgJckaYAMeEmSBsiAlyRpgHoL+CQfT3JTkstH2rZMck6Sq9vjo1p7khyX\nZGWSFUn27KsuSZI2BX2O4D8BPG9S2zHAuVW1CDi3TQM8H1jUfpYAx/dYlyRJg9dbwFfVN4FbJjUf\nCJzcnp8MHDTSfkp1LgC2SLJdX7VJkjR0G/sc/LZVdQNAe9ymtW8PXDfSb1VrkyRJ62GuXGSXKdpq\nyo7JkiTLkixbvXp1z2VJkjQ/beyAv3Hi0Ht7vKm1rwJ2HOm3A3D9VCuoqhOqanFVLV6wYEGvxUqS\nNF9t7IA/AzisPT8M+NJI+6vb1fR7A7dPHMqXJEnTt1lfK05yKvDvga2TrALeBbwPOD3JEcBPgENa\n97OAFwArgV8Dr+mrLkmSNgW9BXxVHbqGWftP0beAI/uqRZKkTc1cuchOkiTNIANekqQBMuAlSRog\nA16SpAEy4CVJGiADXpKkATLgJUkaIANekqQBMuAlSRogA16SpAEy4CVJGiADXpKkATLgJUkaIANe\nkqQBMuAlSRogA16SpAEy4CVJGiADXpKkATLgJUkaIANekqQBMuAlSRogA16SpAEy4CVJGiADXpKk\nATLgJUkaIANekqQBMuAlSRogA16SpAGaUwGf5HlJvp9kZZJjZrseSZLmqzkT8EnuD/wD8HzgScCh\nSZ40u1VJkjQ/zZmAB/YCVlbVtVV1J/AZ4MBZrkmSpHlpLgX89sB1I9OrWpskSZqmVNVs1wBAkkOA\nP6uq17bpVwF7VdVRk/otAZa0yccD39+ohW64rYGbZ7uIgXMb989tvHG4nfs337bxH1fVgnE6btZ3\nJdOwCthxZHoH4PrJnarqBOCEjVXUTEuyrKoWz3YdQ+Y27p/beONwO/dvyNt4Lh2ivwhYlGSnJJsD\nLwfOmOWaJEmal+bMCL6q7kryBuCrwP2Bj1fVFbNcliRJ89KcCXiAqjoLOGu26+jZvD29MI+4jfvn\nNt443M79G+w2njMX2UmSpJkzl87BS5KkGWLATyHJL9vjwiSV5KiReR9Jcnh7/okkP0xySfv5q9b+\nyCSnJLmm/ZyS5JEj63hckrPaLXmvTHJ6km3bvGcluTDJVe1nychy72717DLS9ubWNrirQEf+He6X\n5Lgklye5LMlFSXZq8x6W5KNtO1+R5JtJnjGyjoPb9nnCSNsa17epSXJskjeNTH81yYkj0x9q2++g\nkbbvJ3nnyPTnkrwkyXeT7N7aNkvyqyR/MdJveZI9+39Xc0+S85L82aS2N7X9wOVT9J/Yt1ya5Adt\nH7L9yPwftd/dS9rjYG8KluQd7f/2ivZ+n9G25+I2/2FJjm/7gO+237PXjSy/KMmZbf7yJEuTPLvN\nOzzJ6rbeq5K8eYrXvzTJqZPaRvf9Fyd55kj7Syf1/WUf22UcBvy63QS8sV3ZP5Wjq2r39nNcazsJ\nuLaqdq6qnYEfAicCJHkQ8BXg+KrapaqeCBwPLEjyaODTwOur6gnAs4C/TPLCkde7jO4vDCa8FPje\nzLzVOetlwB8BT62qpwAHA7e1eScCtwCLqmpX4HC6v2udcCjwLe69zda2vk3NvwL7QPfBh27b7Toy\nfx/gmJE+WwG/BJ450ueZbT13rwvYje4eFRPLPRR4LHBpT+9jrjuVe/8O0qb/x1qWObqqdqO738d3\ngaWT9kPPrard6fYBx021gvmuBecBwJ5V9VTgT7j3DdGg2wfcSrcP2AN4HrBlW35if3tC2x8/DTiK\n7ndxwmltO+4LvCPJ3X+uneSJdDn57PY7POrottwxwEdn5A3PMAN+3VYD5wKHjdO5ja6fBvy3kea/\nAxYn2Rl4BXB+VX15YmZVLa2qy4EjgU9U1cWt/WbgbXS/QBO+SLuFb5LHAre3GodsO+CGqvoDQFWt\nqqpb2/Z8BvDOkXnXVtVXoPtkT/ef9gjuvXOdcn0b7+3MKd/mnlDeFbgc+EWSRyV5IPBE4PyRPvsA\nZ9J9IE078vGbqvrppHXtA/wjsHub3gu4uKp+3/cbmqM+CxzQtilJFtJ9yFy1rgWrcyzwU7rv6pjs\nEXQBN0TbATdX1R3Q7ROr6u77o7R9wF7cex+wuqre37q8km5/e/efXFfV5VX1ickvVFU/A1a215zw\nCuCTwNeAF6+hxm8Cu6xh3qwy4MfzPuCt6b4QZ7IP5p5D9E+h+6KcS0Z3ZO35JXQ70CcDy9fwOrtO\nMW8Z9x5R/Ry4LsmT6Uanp63PG5pnTgde1Lbxh5Ls0dp3ZdK2nuQg4Oyq+gFwy8jh4TWtb5PTdpZ3\nJXkMXSifD3yHblS+GFjRpp/cRo8Tfb5PF/770AU73HsEvw/dju+OJA+f1G+T08LjQrrRJXQfOE8D\npnOV88XAE0aml7bD+98A3jn1IvPe14Ad22mK/5PkOZPm7wpcOhHuU9iVbrutU/s/8CC63/kJL6P7\ndzqVbn87lRfRHVmdMJoJl4zz2n0x4MdQVT+k+8/5iilmjx6ivwwIU/+nXVP7OH0mt32GbgdxEPCF\ndaxz3quqVXSHKf8a+ANwbpL9x1j0ULptRXs8dAPXN1QTI++J8D5/ZPpf2+jpCmBPYG+6wL9XH4Cq\n+hGweTvV9AS6DwEX0R1lubvfJmz0MP3L2/R0ZNL0c6vqycBTgI+0I1aDUlW/pDsiuoTuSOVpaddA\nTaWdr78kyX3ugtrmfyHdtTefH2l+WZIrgGuB/1VVv219nw6srqof0x3F3TPJo0aW+2AL8CV0Rwkn\njGbC7swiA358/x14O+veZlcAe7TzmcDd5zZ3A65s85+2lmUnXyz3NO57jv3LwKuAn1TVz8eqfp6r\nqjuq6v9V1dF0/xYH0W2v3Ua39YR2rng/4MQkPwKOpvuPnLWsb1M1MfJ+Ct0h+gvoRvCTR+fPBh7e\nTmdcwD0BPzoyP5/unPAN1f0N7gV0p0n2as83ZV8E9m9Hkh48cSpuGvag24fcS1VdA9xId/RwcKrq\n91V1XlW9C3gD8B9HZn+PkX1AVb23heoj2vyJD6YT6zqY7jqdLUfWcVq7fuffAR9qH1ChGxA8oe0/\nrmnrHH3tiSD/03aKdc4x4MdUVVfR/TIdsI5+K+kuiBk9ZPZOuvOPK+kuottn9MK5JM9rh/f/ATg8\n91yJvBXwfuADk17jN3QfNt67oe9rPkiyZ5I/as/vBzwV+HHbsS0D/nYiuNsVswfShcwpVfXHVbWw\nqnaku9jxWWta38Z/Z3PGt+l+r29pO9NbgC3oQv78kT5/yT0Xya2gG80/hm4nOrquN48sdz7wauCn\nVbWpXsgI3D0aPQ/4ONMYvbdrHf6K7tzw2VPM3wbYiQH+Did5fJJFI027M/I+2z51GfCeiVOo7cK6\niaMdnwb2TTJ6/vwhU71WVZ1Pd779jW2/cAjdhbgLq2oh3bVPazpMPycZ8NPzXrovwVmXI4DHpfsz\nuGuAx7W2iXA+ADgqydVJvkf3ifKmqroB+AvgY0muohs1fXz0grwJVfWZ9RgBzFfbAF9u5xtXAHcB\nH2nzXgs8GliZ5DLgY3RfUnQo9z198Tm60yxrW9+m6DK6q+cvmNR2e7vQE7rfxcfSgruq7qL7C5Nl\nk85/fntSvxvobj29qR+en3Aq3dG8z4y0PT7JqpGfQ1r7B5NcCvwAeDrdIfk7R5Zb2g4RLwWOqaob\nN8Yb2MgeBpyc5HtJVtAdpXj3pD6vBbai2wcsB/6FbgA0ur99fZJrk5xPN+B6zxpe7/3Aa4AXAv9W\nVf82Mu+bwJOSbDflknOQd7KTJGmAHMFLkjRABrwkSQNkwEuSNEAGvCRJA2TAS5I0QAa8JEkDZMBL\nkjRABrwkSQP0/wEhod3mBFN4kQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a1c726940>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig = plt.figure(figsize=(8,4))\n",
    "sns.barplot(x = train['Conference'].unique(), y=train['Conference'].value_counts())\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "import spacy\n",
    "\n",
    "nlp = spacy.load('en_core_web_sm')\n",
    "punctuations = string.punctuation\n",
    "\n",
    "# Define function to cleanup text by removing personal pronouns, stopwords, and puncuation\n",
    "def cleanup_text(docs, logging=False):\n",
    "    texts = []\n",
    "    counter = 1\n",
    "    for doc in docs:\n",
    "        if counter % 1000 == 0 and logging:\n",
    "            print(\"Processed %d out of %d documents.\" % (counter, len(docs)))\n",
    "        counter += 1\n",
    "        doc = nlp(doc, disable=['parser', 'ner'])\n",
    "        tokens = [tok.lemma_.lower().strip() for tok in doc if tok.lemma_ != '-PRON-']\n",
    "        tokens = [tok for tok in tokens if tok not in stopwords and tok not in punctuations]\n",
    "        tokens = ' '.join(tokens)\n",
    "        texts.append(tokens)\n",
    "    return pd.Series(texts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "INFO_text = [text for text in train[train['Conference'] == 'INFOCOM']['Title']]\n",
    "\n",
    "IS_text = [text for text in train[train['Conference'] == 'ISCAS']['Title']]\n",
    "\n",
    "INFO_clean = cleanup_text(INFO_text)\n",
    "INFO_clean = ' '.join(INFO_clean).split()\n",
    "\n",
    "IS_clean = cleanup_text(IS_text)\n",
    "IS_clean = ' '.join(IS_clean).split()\n",
    "\n",
    "INFO_counts = Counter(INFO_clean)\n",
    "IS_counts = Counter(IS_clean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABBEAAAF1CAYAAAC+pnKAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xm8blVdP/DPV1BxRgQNUKHInMoh\nySkzUjOnHBKickBzrjSzMk1/hZqlZVppmVOBQ044kbOiOIuCooJgIKIgKKDgXIqu3x9rHe7D4Tzn\n7HvuOfccLu/363Vf93n2+F1rr72fvb977X2qtRYAAACAlVxuowMAAAAALh0kEQAAAIBJJBEAAACA\nSSQRAAAAgEkkEQAAAIBJJBEAAACASSQRANg0qmrfqmpVtfNGx7KUqjq9qu4yZ9y/V9X/W8N1tar6\n2bVa3qXNZaH8VXW/qjqjqr5bVbfc6Hjmqaq/qarzquprGx0LABtPEgG4VBoXcz+sqt0XDT9+XHzs\nu43LX/ECpqr2rKqXVdXZVfWdqjq5qp5WVVfZlnVvFqN8raquMzPsKXOGvXNjotw8WmuPbq09YzXz\nVtXRVfXwtY6JTe85Sf6otXbV1tqnNzqYpVTV9ZL8aZKbtNZ+aqPjWa3ZY3pVHTq+HzQzfufZ346q\nOmz8xnx35t/BM9M/pKo+V1Xfr6qvVdULq2rXRev8uap6/UjAfKuqPltVT6iqncb4K1bV31XVV6rq\nB1V1SlX9eVXVzDKOHnHdfNGy3zyGH7AO1QWwLEkE4NLsS0l+d+FLVf1CkittjxVX1W5JPjbWd7vW\n2tWS/HqSXZPstz1iWG+ttbOTnJrkjjOD75jk5CWGfXBrl79Zexvs6DZLvW+WONbSKsq0T5ITV7mu\nnVYz3yrsk+QbrbVztnbG6jbrueY3kzx9hXr8+5HgWfj32iSpqj9N8uwkf57kGklum15P76mqK4xp\n9ktyTJIzkvxCa+0aSQ5Ksn+Sq43lvz7JnZPcYwx7UJJHJvnnRXH8T5IHL3ypqmuNdZ67yrIDbJPN\nemAHmOIVmTmxSnJIkpfPTlBV16iql1fVuVX15ap66sJJbVX9bFV9YNwhOq+qFk4QFy6IP7P47tOM\nJyT5TpIHttZOT5LW2hmttT9urX12LOf2VfXJsfxPVtXtZ+I6enQR/uhYx39X1bWq6lVV9e0x/b4z\n07eq+oNxp+o7VfWMqtqvqj42pn/dwsnrmP4RVXVqVX2zqo6sqr0WLevRY1nnV9W/zt75WuSDGQmD\ncbJ9y/QT3NlhtxvTrVTfD6mqj1TV86rqm0kOraqdquo5o/5PS3LPRdvvIVV12ijzl6rqAUsFOe4a\n/s3M9wOq6syZ739RVV8dy/lCVd15DL9cVT2pqr5YVd8Y9bjbzHwPGuX4RlU9ZU4dXSKGhfVX1Z9W\n1TnVe6s8dM58z0zyK0leMNrCC2ZG32Xedqqq36+qk8a4d1XVPnOWv/CIyMOq6itJ3jeG33a0vwuq\n6jM1c0dzuXpfbr1V9c/Vu+d/u6qOq6pfmRl3aFUdUVWvrKpvJ3nI2P5/Oer/O2Oe600p/6IyLiz7\ntWM5n6qZO7cz2/g7VfX5qrrforJ+pKqeX31fPXmhfYzx16gtPY6+Wn2/3WnRvLNtesnjyqJ4r1hV\n302yU/px5otj+I2rHxsuqKoTq+reM/McVv1u99ur6ntJfm2J5e5WVf9ZVWeNOnvzzLitPiZUf3Tn\nPUn2Gm3zsDH9cm3n6Kp6ZlV9JMn3k/zMhDr8cPXjwPmjvd19YpnuVb332QUjnpst1T7meGeSHyZ5\n4FbMk6q6epKnJXlsa+2drbUfjd+A305PJCws72lJPtpae8JIyKa19oXW2u+11i4YbeyuSe7fWjuh\ntXZha+3jY/4/rIv3hHtVkoNrS8Ljd5O8acQPsP211vzzzz//LnX/kpye5C5JvpDkxukn42ekn8S1\nJPuO6V6e5C3pd3n2Tb+j87Ax7tVJnpKeUN0lyR1mlt+S/Owy6/94kqctM363JOen31naOf2k7/wk\n1xrjj06/y79f+p2sz4/Y7jKmf3mS/1wUz5FJrp7kpkn+L8lRSX5mZv5DxrR3SnJekl9McsUkz0/y\nwUXLemt6r4nrp9/NutucchyS5DPj8/7pyYIbLBr2gyRXmFDfD0lyYZLHjjJeKcmj03s2XG/U2ftH\nfDsnuUqSbye54Zh/zyQ3nRPnYUn+Zub7AUnOHJ9vONrGXuP7vkn2G58fP7bldUddvSjJq8e4myT5\nbnrC5IpJnjviv8tKMYz1X5jk6Ukun36n8ftJrjln3qOTPHzRsLnbKcl909vPjUddPTX9gmWpZe87\nlvXyUadXSrJ3km+MuC6X3ovmG0n2WK7eV1pv+gXQtca4P03ytSS7jHGHJvnRWMblRhx/nuRzYxtV\nkptnyz6yNe10YdkHjvr+s/SeSpcf4w9KstdY78FJvpdkz0Xt8k/GvAcn+VaS3cb4N492cZUk107y\niSSPWqZNzz2uLBH3RceZse5Tk/xlkiuk78ffmdkOh424fnlh2Uss721JXpvkmmN5v7qtx4TM7Evj\n+9y2M9OWv5J+nNp5xLFSHf4oySPSj+OPSXJWklqhTL+Y5JwktxnzHZL+u3DFCXV9aJJXJrl3ktPG\ncnfOxX87DsvMMWVmOXcb23znJcYdni3Hj68leegy2/5ZST4wZ9yXZ+rn6CQPT/LuJHcfwz6Rnrw9\nM8kB89bhn3/++bde//REAC7tFnoj/Hr6xehXF0aMuzYHJ3lya+07rd8t+sf0C/ukn7juk35x+b+t\ntQ9vxXqvleTsZcbfM8kprbVXtH6H6dUjvt+cmeY/W2tfbK19K8k7knyxtfbe1tqF6d1cF79o7dmt\ntW+31k5MckKSd7fWTpuZf2H6ByT5j9bap1pr/5fkyUluVxd/T8SzWmsXtNa+kn7hfos55fhAkp+v\nqmum3y3/UGvtlCS7zwz7eGvthxPqO0nOaq09f9TJD9Lv3v1T6704vpnk7xat/ydj/VdqrZ09yr61\nfpx+4XSTqrp8a+301toXx7hHJXlKa+3MUVeHJjmwerf0A5O8tbX2wTHu/414pvpRkqe3fqfy7ekJ\niRtuZezzttOjkvxda+2k0V7+Nsktak5vhOHQ1tr3Rr0/MMnbW2tvb639pLX2niTHpl8YJvPrfdn1\nttZe2Vr7xti+/5he77Nl/lhr7c1jnT9Ivzh6aut3aFtr7TOttW9MKP9SjmutHdFa+1F6wmeX9C7f\naa29vrV21ljva5OckuTWM/Oek94OfzTGfyHJPau/++PuSR4/6u6cJM9L8jsz8y5u06s9rtw2yVVH\nmX/YWntf+oX9785M85bW2kdGOf53duaq2nPE+ujW2vmjLB8Yo9fymLBS20mSw1prJ442sltWrsMv\nt9Ze0lr7cfqF+J5JrrNCmR6R5EWttWNaaz9urR2enly97bwKXqy1dmR6wmTe+0j+bPRyuKCqzhvD\ndk9y3ijbYmeP8cnKvxG7LzN+djkLXp7kwVV1wyS7ttY+tsyyAdaVJAJwafeKJL+Xfjfr5YvG7Z5+\nR+/LM8O+nH4nLUmemH738xOj6/Dvb8V6v5F+ojvPXovWu3jdSfL1mc8/WOL7VRfNP3X6i627tfbd\nEe/sumffsv79Jda1MO/p6Xe77pB+R/5DY9THZoYtPP6xUn0nvUfArL0WDZuN+3vpSYlHJzm7qt5W\nVTdaKs7ltNZOTe9xcGiSc6rqNTNdufdJ8qaFC4UkJ6UnHa6zOLYRz+wF7kq+sehCY249L2Pedton\nyT/PxP3N9La8d+abred9khw0c4F0Qfr23HOFel92vdUf3zhpdOW/IL2XzOzF0OLtf70kX8x8k9rp\n4mW31n6S3m73GnE9eKbb+wVJfn5RXF9trbWZ718e8+6Tfpf67Jl5X5R+N31emVZ7XNkryRkj9tk4\nltt/Zl0vyTdba+fPWfaaHBOyTNuZE+eUOrxo3a2174+PV12hTPsk+dNFcVxvlHVrPDW958guS4x7\nTmtt1/Fvob2cl55EXer9F3uO8cnKvxHnLTN+djkL3pjeo+Sx6b97ABtGEgG4VGutfTm92/I90k+y\nZp2XLXcFF1w/o7dCa+1rrbVHtNb2Sr/D+m81/U/KvTfJ/Wr+S8POWrTei617nV1s3dX/WsS1tmHd\nH0pPFtwuyUcXDbtDtiQRlq3vYfZCLel33Gafgb/+7MjW2rtaa7+eflJ9cpKXzInxe0muPPP9Ym+R\nb639V2vtDtnyuMuzx6gz0rsI7zrzb5fW2lcXx1ZVV06vx/WwuF5WckZ6d+fZuK/UWvvoMvPMruOM\nJK9YNP9VWmvPSpat97nrrf7+g79I711yzdbarund72ffY7C4nGdk7V5EOrutLpf+iMpZo5fES5L8\nUfqjErum9+SZjWvvqou9b+H66fvRGel3t3efKe/VW2s3nVembTiunJXkeouOKSvtP7POSLJbLfoL\nATPLXqtjwrJtZ4k4p9ThcuuaV6YzkjxzURxXbr3X12SjJ8WpSf5g4iwfSy/Pb80OHHV69/THzJL+\nG3H/ZZbz3iS3qYu/AyRVdev0tvy+RXF+P73H2WMiiQBsMEkEYEfwsCR3GndQLzK6xr4uyTOr6mrj\nYuIJ6c/CpqoOqqrrjsnPTz/x/fH4/vX09w3M89z09xMcvtCVu6r2rqrnjpd7vT3Jz1XV71X/02EH\npz9j/9Y1KO9K/ivJQ6vqFlV1xfQu58eMXgWr8cH0R0bOaq19ewz78Bh2jfST6hXre47XJXlcVV13\nPB7xpIURVXWdqrr3ODn/v/THAX48ZznHJ7nHeAnbT6X3PFhYzg2r6k6jLv43vdfGwnL+fcS7sA33\nqKr7jHFHJLlXVd2h+ksrn571+91cqb0t9u9JnlxVN00uevnfQSvMM+uVSX6zqn6j+ssNd6n+Msjr\nrlDvy633aunPip+bZOeq+qv0fWQ5L03yjKq6QXU3q/7m+dW4VVX91rhD/PgR+8fTn8NvI65Uf8Hl\nzy+a99rp7fDyozw3Tu+yf3b6s+j/WFVXr/4izv2q6lfnBbHCcWU5x6Qnw5444jgg/fGn10wp/Ij1\nHelJi2uOZSz8FZW1PCbMbTvLxLVVdTixTC9J8uiqus1oO1epqntW1dXmL3Gup6T3IFlR64+PPS3J\n86vqbiOmfdMfQTszWy7w/zrJ7avqH8YxaeFlvq+sql1ba+9NTzi8oapuOurytukvUXxh64+NLfaX\n6e+EOH0VZQRYM5IIwKVe6+8VOHbO6Memn5ifln7h+19J/mOM+6Ukx1R/S/qRSf64tfalMe7Q9ATB\nBVX120us85tJbp9+5/2YqvpO+gnht5Kc2vpz3fdKf7ncN9JPUO/VWlvcRXXNtdaOSn9+/w3pd9P3\ny8WfP95aH0i/yJp9tvv49JfIHTfT/ThZvr6X8pIk70rymSSfysV7k1wuvf7OSu82/6uZf7fwFWMZ\np6dfsMy+Ef+K6S8xOy+92/S100/Gk/6XJo5M8u6xDT+e/qK2tP4egD8cZTg7/YLwzKyPf05/F8P5\nVfUvK03cWntTem+K11T/SwcnpN8FnaS1dkaS+6TXw7npd3X/PL3O59b7Cut9V/oF3/+kd53/3yzf\n/T7pybjXpW+zbyd5WVb/Z1rfkv4YxsILTX9rPEP/+fR3c3wsPVnzC0k+smjeY9JfGHpekmcmObBt\neTfDg9Mf0/n8WPYRWb6b+nLHlblaaz9Mf9Hf3Ucc/5bkwa21k1ead8aD0o9JJ6e/5+HxY9lrdkxY\noe3Ms7V1OGtemY5Nfy/CC8YyT01/rG2rtdY+kv6ywqnT/316+Z+T3m4X/pTjnVt/50Raf+/K7dJf\nbHpiVX0rvf6PTX9hZtJ7Krw//S9FfDc9QfOy9OPoUus9q23du3sA1sXCm28BAC6VqurQ9Dfvb9Wf\n6xvzPiT9L2PcYa3jAoAdkZ4IAAAAwCSSCAAAAMAkHmcAAAAAJtETAQAAAJhEEgEAAACYZOftubLd\nd9+97bvvvttzlQAAAMAKjjvuuPNaa3usNN12TSLsu+++OfbYeX/KHQAAANgIVfXlKdN5nAEAAACY\nRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhE\nEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYZOeNDuDcF75yo0NY1h6PeeBGhwAAAACbgp4IAAAA\nwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADA\nJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAk\nkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSS\nCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJDtvdAA7iq+98G82OoRl/dRjnrrRIQAA\nAHAppycCAAAAMIkkAgAAADCJJAIAAAAwiSQCAAAAMIkkAgAAADDJpCRCVf1JVZ1YVSdU1aurapeq\n+umqOqaqTqmq11bVFdY7WAAAAGDjrJhEqKq9kzwuyf6ttZ9PslOS30ny7CTPa63dIMn5SR62noEC\nAAAAG2vq4ww7J7lSVe2c5MpJzk5ypyRHjPGHJ7nv2ocHAAAAbBYrJhFaa19N8pwkX0lPHnwryXFJ\nLmitXTgmOzPJ3usVJAAAALDxpjzOcM0k90ny00n2SnKVJHdfYtI2Z/5HVtWxVXXsueeeuy2xAgAA\nABtoyuMMd0nypdbaua21HyV5Y5LbJ9l1PN6QJNdNctZSM7fWXtxa27+1tv8ee+yxJkEDAAAA29+U\nJMJXkty2qq5cVZXkzkk+n+T9SQ4c0xyS5C3rEyIAAACwGUx5J8Ix6S9Q/FSSz415XpzkL5I8oapO\nTXKtJC9bxzgBAACADbbzypMkrbW/TvLXiwafluTWax4RAAAAsClN/ROPAAAAwGWcJAIAAAAwiSQC\nAAAAMIkkAgAAADCJJAIAAAAwiSQCAAAAMIkkAgAAADCJJAIAAAAwiSQCAAAAMIkkAgAAADCJJAIA\nAAAwiSQCAAAAMIkkAgAAADCJJAIAAAAwiSQCAAAAMIkkAgAAADCJJAIAAAAwiSQCAAAAMIkkAgAA\nADCJJAIAAAAwiSQCAAAAMIkkAgAAADCJJAIAAAAwiSQCAAAAMIkkAgAAADCJJAIAAAAwiSQCAAAA\nMIkkAgAAADCJJAIAAAAwiSQCAAAAMIkkAgAAADCJJAIAAAAwyc4bHQCby8n/ep+NDmFZN/rDt2x0\nCAAAAJdZeiIAAAAAk0giAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJNIIgAAAACTSCIAAAAAk0gi\nAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJNIIgAAAACTSCIA\nAAAAk0giAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJNIIgAA\nAACTSCIAAAAAk0giAAAAAJNMSiJU1a5VdURVnVxVJ1XV7apqt6p6T1WdMv6/5noHCwAAAGycqT0R\n/jnJO1trN0py8yQnJXlSkqNaazdIctT4DgAAAOygVkwiVNXVk9wxycuSpLX2w9baBUnuk+TwMdnh\nSe67XkECAAAAG29KT4SfSXJukv+sqk9X1Uur6ipJrtNaOztJxv/XXmrmqnpkVR1bVceee+65axY4\nAAAAsH1NSSLsnOQXk7ywtXbLJN/LVjy60Fp7cWtt/9ba/nvssccqwwQAAAA22pQkwplJzmytHTO+\nH5GeVPh6Ve2ZJOP/c9YnRAAAAGAzWDGJ0Fr7WpIzquqGY9Cdk3w+yZFJDhnDDknylnWJEAAAANgU\ndp443WOTvKqqrpDktCQPTU9AvK6qHpbkK0kOWp8QAQAAgM1gUhKhtXZ8kv2XGHXntQ0HAAAA2Kym\nvBMBAAAAQBIBAAAAmEYSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQS\nAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIB\nAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEA\nAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAA\nAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAA\nmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACY\nRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhE\nEgEAAACYZHISoap2qqpPV9Vbx/efrqpjquqUqnptVV1h/cIEAAAANtrW9ET44yQnzXx/dpLntdZu\nkOT8JA9by8AAAACAzWVSEqGqrpvknkleOr5XkjslOWJMcniS+65HgAAAAMDmMLUnwj8leWKSn4zv\n10pyQWvtwvH9zCR7r3FsAAAAwCayYhKhqu6V5JzW2nGzg5eYtM2Z/5FVdWxVHXvuueeuMkwAAABg\no03pifDLSe5dVacneU36Ywz/lGTXqtp5THPdJGctNXNr7cWttf1ba/vvscceaxAyAAAAsBFWTCK0\n1p7cWrtua23fJL+T5H2ttQckeX+SA8dkhyR5y7pFCQAAAGy4rfnrDIv9RZInVNWp6e9IeNnahAQA\nAABsRjuvPMkWrbWjkxw9Pp+W5NZrHxIAAACwGW1LTwQAAADgMkQSAQAAAJhEEgEAAACYRBIBAAAA\nmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACY\nRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhE\nEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmGTnjQ4A1svRL7nnRoewrAMe8baNDgEA\nAGCr6IkAAAAATCKJAAAAAEwiiQAAAABMIokAAAAATCKJAAAAAEwiiQAAAABM4k88wiZ3xH/ebaND\nWNaBD33nRocAAABsJ3oiAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJNIIgAAAACTSCIAAAAAk0gi\nAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJNIIgAAAACTSCIA\nAAAAk0giAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJNIIgAA\nAACT7LzRAQCXDS96xW9sdAjLetSD3jVpukNft7nLcehvTysHAACshp4IAAAAwCSSCAAAAMAkKyYR\nqup6VfX+qjqpqk6sqj8ew3erqvdU1Snj/2uuf7gAAADARpnSE+HCJH/aWrtxktsm+cOqukmSJyU5\nqrV2gyRHje8AAADADmrFJEJr7ezW2qfG5+8kOSnJ3knuk+TwMdnhSe67XkECAAAAG2+r/jpDVe2b\n5JZJjklyndba2UlPNFTVtefM88gkj0yS61//+tsSKwBr5O5vuf9Gh7Cid9znDRsdAgAAi0x+sWJV\nXTXJG5I8vrX27anztdZe3Frbv7W2/x577LGaGAEAAIBNYFISoaoun55AeFVr7Y1j8Neras8xfs8k\n56xPiAAAAMBmMOWvM1SSlyU5qbX23JlRRyY5ZHw+JMlb1j48AAAAYLOY8k6EX07yoCSfq6rjx7C/\nTPKsJK+rqocl+UqSg9YnRACY7x5v+puNDmFZb7/fUydNd883vnCdI9k2b/utx2x0CADAJrBiEqG1\n9uEkNWf0ndc2HAAAAGCzmvxiRQAAAOCybav+xCMAwHLudcSrNjqEZb31wAdMmu7eR/z3OkeybY48\n8Dc3OgQALqP0RAAAAAAmkUQAAAAAJvE4AwDADup+b/jwRoewojfd/w6Tpjv4jaeucyTb5rW/9bOT\npvvXN319nSPZNn94v+tsdAjAJqcnAgAAADCJJAIAAAAwiSQCAAAAMIl3IgAAAFvlHa89b6NDWNbd\nD959o0OAHZaeCAAAAMAkkggAAADAJB5nAAAALpM+/dJzNjqEFd3y4deeNN3Zf//VdY5k2+z5xL0n\nTff1fzpunSPZNtd5/K0mTXfOC969zpFsm2v/0V1XPa+eCAAAAMAkkggAAADAJJIIAAAAwCSSCAAA\nAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAA\nwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADA\nJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAk\nkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSS\nCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJII\nAAAAwCSSCAAAAMAk25REqKq7VdUXqurUqnrSWgUFAAAAbD6rTiJU1U5J/jXJ3ZPcJMnvVtVN1iow\nAAAAYHPZlp4It05yamvttNbaD5O8Jsl91iYsAAAAYLPZliTC3knOmPl+5hgGAAAA7ICqtba6GasO\nSvIbrbWHj+8PSnLr1tpjF033yCSPHF9vmOQLqw93kt2TnLfO69gelGPz2VHKohybi3JsPjtKWZRj\nc9lRypHsOGVRjs1FOTafHaX9EFpfAAATRElEQVQsyjHdPq21PVaaaOdtWMGZSa438/26Sc5aPFFr\n7cVJXrwN69kqVXVsa23/7bW+9aIcm8+OUhbl2FyUY/PZUcqiHJvLjlKOZMcpi3JsLsqx+ewoZVGO\ntbctjzN8MskNquqnq+oKSX4nyZFrExYAAACw2ay6J0Jr7cKq+qMk70qyU5L/aK2duGaRAQAAAJvK\ntjzOkNba25O8fY1iWSvb7dGJdaYcm8+OUhbl2FyUY/PZUcqiHJvLjlKOZMcpi3JsLsqx+ewoZVGO\nNbbqFysCAAAAly3b8k4EAAAA4DLkUpNEqKr7VtVN1mG5h1bVn631cieu+1JXpqp6e1XtupXzHF1V\nm+JNojuqqtq3qk7Y6Di2p1Hm31vlfJu2rqrqu+P/i5Wvqvavqn/ZuMimqaoDquqtWzH9LarqHusZ\n01pZr2P2Wquqh1TVC1Yx372r6knrEdNaWm351ttmP7YspaoOqqqTqur94/urq+qzVfUnVfX0qrrL\nMvNu0zGpqv5ytfOuhXnnSlW1V1UdMT5v1fFsvVXV46vqyjPft/qcbM5yN1U5Z1XV6VW1+7ZOc1m0\nrcfKqe1i5rzlon1nM1n8273SsW07xXRYVR24Qetek9+qS00SIcl9k6zpyVtVbdM7IdbApa5MrbV7\ntNYuWLTOqqpLU1va7tTRutg3yZJJhE2wb6+FfTNTvtbasa21x21cOOvmFkm2exJhlfvkmh+zN5PW\n2pGttWdtdBxsVw9L8gettV+rqp9KcvvW2s1aa89rrf1Va+2982Zcg2PShiYR5mmtndVa25CT+wke\nn+SiJMJS52SwUbbHvrMWv90rHduYZsMuakYW5KSqeklVnVhV766qK1XVflX1zqo6rqo+VFU3qqrb\nJ7l3kn+oquOr6jZVddxYzs2rqlXV9cf3L1bVlatqn6o6amTUj5oZf1hVPXdk3Z+9KKZHVNU7RhyP\nq6rPj/lfsyOUaWIZnlhVjxufn1dV7xuf71xVr1zI9s6U9d+SfCrJ9arqrlX1sar6VFW9vqquusTy\nl5ymqp41U9/PGcMOqqoTquozVfXBKfGvULarVNXbxvJOqKqDq+pWVfWBsW3eVVV7jmmPrqpnV9Un\nqup/qupXxvCbjmHHj1hvMIY/YSzzhKp6/Bh2iTra1jKsYOeqOnzEdcRoM39VVZ8ccb24qmrEdon2\nPernP8b0n66q+6xnsFX14LH+z1TVK1Zo3/9SVR+tqtNqS+b2WUl+ZWyLP6mecX99Vf13kndX9w+j\n7J+rqoPXszwz5dq3qk6uqpeOdb+qqu5SVR+pqlOq6ta16A7YmG7fRYtaXL51v1M0E/vUdvSzVfXe\nsQ0/VVX7LVreL4229DNLta/qfx746UkOHuVc1220xD75oNE2TqiqZ89M992ZzweONrj4mL3fJdew\nJjG+eRyPTqyqRy7EU1XPHPX88aq6zhj+m1V1zKjP9y4Mn1nW1arqS1V1+fH96tWP4Zefcwy46K5V\nrfHxdw3q4KHVj8UfSPLL2zOerbRTXfIc4BZju322qt5UVddMVt5/1lpVPbC2/H69qKr+Oskdkvx7\nVf1DkncnufYY/ys1c6ds7MsfHbF+YrSti45JS+3fY/hDquqN1c+BTqmqvx/Dn5XkSmNdr9qGMk05\n3u422tRnx3a42cwibl5V7xvTPmJmmZe4SzevjNuqFp0/1Pzj8OOS7JXk/bWl58jsOdmy9TCmv/XY\njp8e/99wLcqwRJlWvV2q6lpj3/l0Vb0oSc0sd3Eb3mk94p9YvhNmvv9Z9d/17XJuVUufz15iHx2T\n77V4/xvLmHc+frex7T6c5Ldmpl/xvGW2Xubt+2Pcw6ofz4+ufrxctrdErfFvd1382HZ6VT1t1MPn\nqupGY/geVfWeMfxFVfXl2obeLrXonHcMvmMtOretqqtWPwdeiGfhWLrkNeYYN++aZafq58GfHOt+\n1GrjX1JrbUP+pd9luzDJLcb31yV5YJKjktxgDLtNkveNz4clOXBm/hOTXD3JHyX5ZJIHJNknycfG\n+P9Ocsj4/PtJ3jyznLcm2Wl8PzTJn43lHJnkimP4WTOfd90RyjSxDLdN8vrx+UNJPpHk8kn+Osmj\nkpyeZPdR1p8kue2YdvckH0xylfH9L5L81fh8dJL9502TZLckX0guetHnruP/zyXZe2u2wQplu3+S\nl8x8v0aSjybZY3w/OP1PlS7E/I/j8z2SvHd8fn6SB4zPV0hypSS3GrFeJclVx3a85eI62g77U0vy\ny+P7f4w2sNvMNK9I8pvz2neSv03ywIVhSf5nYVutQ7w3Hdt89/F9txXa9+vTk543SXLqGH5AkrfO\nLPMhSc5cKPPY3u9J/xO010nylSR7jro6YZ23xYVJfmHEfNzYHpXkPknenLGPzsxzQpJ9x+fvzinf\nxb5vknZ0TJL7jc+7pN8hOyD9eHT7UfbrL9e+xnZ7wXrvIzPl+0n6cW6v0Sb2SP9LRe9Lct/ZbTA+\nH5jksJm2eOA6x7jQfq802sW1xjZZqPO/T/LU8fma2XLcfHi2HLMuqtMk/zlTrkfOTLPUMWB2vjU9\n/m5jHew9s62ukOQj26vNrKJ9LXUO8NkkvzqGPT3JP43Pl9h/1jG2G6cfYy8/vv9bkgdn/D7PxH/C\nzDyHjfZ/hSSnJfmlMfzqY585IOOYtML+fVr67+0uSb6c5Hpjuu+uQbkW6ny54+3zk/z1mP5OSY4f\nnw9N8pnRznZPckb6ceGiephSxm2Mf975wyWOw+Pz6Rm/m7Pfp9TD7LYbn++S5A2Ly7nG+8Jqtsu/\nZMv54z1HXeyeOW14qXpZ73+55L7yZ6M9bZdzqyx9PrvUPvqQLLH/Zf75+C7p+8ENxrZ63Uz7PzQr\nn7dcVC/LrHuvsb12S7+++FBWOJ5njX+7Z7+PWB47Pv9BkpeOzy9I8uTx+W4L7XCV22upc97DsvS5\n7c5Jrj4+757k1LEt9s0Svy/j89FZ+prlkdlyvnDFJMcm+ems0XnwRnf5/VJr7fjx+bj0Qt0+yeur\nLko8XnHOvB9Nvxtxx/Qd9G7plfyhMf522ZJBe0X6ideC17fWfjzz/UHpFx/3ba39aAz7bJJXVdWb\n0w92O0KZpjguya1GBvP/0jN++yf5lSSPS/LkmWm/3Fr7+Ph82/Sd4COjnFdI8rFFy543zbeT/G+S\nl1bV29IvQJJ+onhYVb0uyRu3ogzzfC7Jc0bW8q1Jzk/y80neM+LZKcnZM9MvrHNhO2bE+5Squm6S\nN7bWTqmqOyR5U2vte0lSVW9Mr68jc/E6Wm9ntNY+Mj6/Mn17famqnph+cbdb+gnKf2fp9n3XJPee\nyTTvkuT6SU5ah1jvlOSI1tp5SdJa+2ZVLde+39xa+0mSz9eiu62LvKe19s3x+Q5JXj32i69Xv4P5\nS+llX29faq19Lkmq6sQkR7XWWlV9Lr0tHb/czBtsUjuqqqPTLzLflCSttf9NkrEv3Tj9zxDdtbV2\n1ljWvPa1vX25tfbxkd0/urV2bpJUvxt6x2zd8X49PK6q7jc+Xy/9ZO6H2XJcPC7Jr4/P103y2uo9\nqK6Q5EtLLO+lSZ6YXq6HJnnEGL7Sb9xaH3+3xuI6eFAuvq1em+TntnNMUy0+B9gv/WLiA2PY4enn\nA1fLEvvPOrpz+gXrJ8c+eqUk50yc94ZJzm6tfTJJWmvfTi7a1xcst38f1Vr71pjn8+k3R85YdUku\naaXj7T7pF11prb2v+p3ua4x539Ja+0GSH1S/u3/rzD8+r8dv5Lzzh6WOw89ZYVkr1UPSL+gOr96L\nsqVfxK2X1W6XO2acC7TW3lZV54/lbUsb3l6217nV4vPZCzJ/H11q/9s1S5+P3yh9u50ypn9l+oXo\nai217t2TfGDhXK2qXp9px/P1/O2ePd9fOA+9Q5L7JUlr7Z0z7XA1ljrnTZY+t60kf1tVd0xPnOyd\nfiMsWfoac6kyLAy/a5Kb1ZYevNdIP6f4n20oy0U2OonwfzOff5xeSRe01m4xYd4PpR9o90nylvQs\nWsuWE63F2szn7y0ad0L6c7nXzZaTsHumN8p7J/l/VXXT1tqFE+LazGVaUWvtR1V1evrJ5kfTD4i/\nln4itPiAN7vOSr+A+91lFj93mupd7e6c5HfSe1DcqbX26Kq6Tfq2OL6qbtFa+8bUsizWWvufqrpV\nepbu79LvUp/YWrvdnFkWtuWPM/aV1tp/VdUxI6Z3VdXDM9PVbgmLt8t6akt8/7f0u0xnVNWh6T9e\nyRLtO70c92+tfWE7xFpLxLvY7PjZ/WpqfS833XqbjfcnM99/kt6WLszFHyfbJZvH1Ha0XP2ePaa5\nZfqdmWRO+xr7+Pa00EaWi3+2DrbbtqmqA9LvDt6utfb9kajZJcmP2riVkJnjUfqdvOe21o4c8x66\neJmttY+MbpC/mt5bbaEL7lLHgNn51vT4O9WcOjg5PTF1abD4HGDeS++29/GpkhzeWnvyxQb2+p0y\n70rH6+X278V1stbnnlOOt4u1Rf8vHr6U9fiNnNcOtiauBSvVQ5I8I8n7W2v3q94V/ehJUa7OWm6X\nZE4b3iDzfsO3y7nVEuez7878NrLU/rfk+XhV3WKZ5azmvGXeuldjPX+7L3G+v8J6tta8Y+hS57YP\nSO9lcauZa7Jdlpj+x+mJtMXLWlyGx7bW3nWxYC75+OyqbLYXvX07/Y7XQUlS3c3HuO8kudrMtB9M\n7yZ4ysjifDN9Z1rI3H40/YI06Rvkw8us99PpXfWPrP5m0culd7d7f/odnF3Tu5ldasu0lTF/ML1r\n1gfTExuPTu9mttyP2MeT/HJV/WySVH9+b3Fmcclpqj+HdY3W2tvTXxp0izF+v9baMa21v0pyXrbx\nnQKjHr7fWntlekb/Nkn2GHfAU/054ZuusIyfSXJaa+1f0nsa3Cy9nu47ynOV9Mzlh5ZZzHq5/kJZ\nkvxutrSP80YdLzxvNa99vyvJY6suet79lusY61FJfruqrjXWtVu2rn0nl9x/Fvtg+rP2O1XVHuk/\n7J/YpqjXzulJfjFJquoX07uXLbZS+dbLpHY07nScWVX3TZKqumJteWv4BeknU387LgqT+e1ro8p5\nTJJfrf488U7pZV24W/z1qrrx2FfuNzPPesd6jSTnj4vnG6X33lpp+q+Oz4csM93Lk7w6/dGG5Y4B\nF1nr4+9WWKoOrpTkgHGn8vJJDtpOsayFbyU5v8Yzqum9Kj6wwv6zHo5KcmBVXXusb7eq2mfivCen\nP1f9S2Peq9UlX167mt+PH43tud4+mP6bspCkOm/hTm2S+1TVLuO36ID0R0nnWY/fyHnnD/OOw9t6\nDJo9ZjxkG5azFuZtl9nhd09/bCvZtja81r6e/v6Qa1XVFZPcK/2aarucWy1xPnvbrLyPzpp3zn5y\nkp+uLe9nmU0ynJ6Vz1um+ET6b+81R4z338r5t9dv94eT/HbS3x+RLe1wNZY6553nGknOGQmEX0u/\nsbxa70rymNryXqSfG8eZNbHZkghJP3A8rKo+k971euEFJK9J8ufVX0qyX2vt9DF84YVPH06/47/Q\n3eRxSR5aVZ9N/9H+4+VW2lr7cPqF89vSn0F9ZfUuV59O8ry2bW+/3fAy1da9DORD6c+Of6y19vX0\nRw2WvSge3YoekuTVI76Pp3eLmjLN1ZK8dQz7QJI/GbP8Q40Xp6TXyWe2ogxL+YUkn6iq45M8Jf35\nrwOTPHtsm+PTHz1ZzsFJThjLuFGSl7fWPpX+bNMn0g9uL22tfXobY12Nk5IcMupxtyQvTPKS9G5v\nb86Wk6OdsnT7fkZ618bPjjp/xnoF2lo7Mckzk3xg1P1zs5XtO72XzIXVX1LzJ0uMf9OY5jPpz8w9\nsbX2tbUqwzZ6Q5LdRjt6TJbuWrZS+dbL1HaU9O30uDHtR5P81MKIcez4zST/Wv1u5Lz29f4kN6nt\n8GLFWa21s9Mfz3p/ehv5VGvtLWP0k9J7gL0vF3/E6WLH7HUI653pL0j9bHr9rPQo1KHpXeM/lH6h\nP8+r0k+AXj2+zzsGzFrr4+9US9XB2ell/ViS96Y/Zndpckh6fX42PUn+9DF87v6z1lprn0/y1PSX\nzn42vSfenhPn/WH6b9/zx/H6PbnkXb7V/H68eEy/6hcrTnRokv1HuZ+ViyfcPpF+3vfxJM+Yefxq\nKWv+G7nU+UP6o5ZLHYeTXmfvqPFixVX4+yR/V1UfST8ObKRDs/R2eVr6C+c+ld4d+yvJtrXhtdb6\nY8JPT99mb02/+N6e51ZLnc+utI/Oxr/k+fh4rOqR6dcNH05/j8GCKectK2qtfTX9ke1j0o/nn09P\ntk6df3v9dj8tyV1HO7z7WN53psa5KOalznnneVX6fnFs+vXjyatZ5/DS9Pr91Gh7L8oa9gRbeCET\nAJdx1bu4vbW19vMbHAprqPrzkPdprT1oo2MBluc4zI6uqq7aWvvu6InwpvSXmr9po+OaNXqY/Li1\nduHoFfTCiY+mX2Zs9DsRAIB1UlXPT7+Lco+NjgUAkhxaVXdJ7y3x7mz8C42Xcv0krxuPRvwwW15K\nzKAnAgAAADDJZnwnAgAAALAJSSIAAAAAk0giAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJP8fxVf\nOEkCau8kAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a14a537b8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "INFO_common_words = [word[0] for word in INFO_counts.most_common(20)]\n",
    "INFO_common_counts = [word[1] for word in INFO_counts.most_common(20)]\n",
    "\n",
    "fig = plt.figure(figsize=(18,6))\n",
    "sns.barplot(x=INFO_common_words, y=INFO_common_counts)\n",
    "plt.title('Most Common Words used in the research papers for conference INFOCOM')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABBEAAAF1CAYAAAC+pnKAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xm8bmVZP/7PBUcEEUUETUA5RmSa\npRZaKqY/NcsRNUwtFcz0a5ZDamqlRQ6lDZrlPIKziJrzlIE4oqAoIKgEKAgKKDjngPfvj/ve8LDZ\nwzrn7H325vB+v17Paz/PGq97rXtN17rX2tVaCwAAAMBytlvrAAAAAIArBkkEAAAAYBJJBAAAAGAS\nSQQAAABgEkkEAAAAYBJJBAAAAGASSQQArpCqamNVtarasNaxLKSqzqyqOy/S7yVV9bQVnFerql9a\nqeld0VwZyl9V96mqs6rq+1V1i7WOZzFV9cyquqCqvrHWsQCwOiQRgG3euJj7SVXtPq/7CePiY+MW\nTn/ZC5iqul5VvbKqzq2q71XVqVX1D1W185bMe70Y5WtVdd2Zbn+7SLf3r02U60dr7ZGttWdszrhV\ndXRV/elKx8S6969J/qK1dvXW2ufWOpiFVNX1kzwhyU1aa7+w1vFsrtl9elXtWlWvqqpvjH33l6vq\nyTPDVlU9pqpOqqofVNXZVfWWqvq1edM8dEz3VvO671BV/zbG+35VnVFVz9s6JQXYPJIIwJXFGUke\nOPdjnODttDVmXFW7JfnkmN+tW2u7JPndJLsm2XdrxLDaWmvnJjktye/MdP6dJKcu0O2YTZ3+em1t\nsK1bL8t9vcSxkjajTPskOXkz57X95oy3GfZJ8q3W2nmbOuK4GF+P56XPS3L1JDdOcs0k90ryvzP9\nn5/ksUkek2S3JL+c5L+S3H1ugKqqJA9O8u0kB8+b/l8n2T/JrZLskuT/S7Iuk0QAc9bjzhpgNbw2\nyUNmfh+c5DWzA1TVNavqNVV1flV9taqeOndSW1W/VFUfqarvjKa6bx7d5y6IPz/uIt1/gXk/Psn3\nkjyotXZmkrTWzmqtPba19oUxndtU1WfG9D9TVbeZievo0UT4E2Me76qqa1fV66vqu2P4jTPDt6p6\nVFV9Zdw5e0ZV7VtVnxzDH1FVO8wM//CqOq2qvl1V76yqPedN65FjWhdW1QvHCfFCjslIGIyLlluk\nn2DPdrv1GG655X1IVX28qp5XVd9OcmhVbV9V/zqW/+mZOUmfGef0UeYzquqPFwqyqg6rqmfO/L5D\nVZ098/vJVfX1MZ0vVdWdRvftquopVfW/VfWtsRx3mxnvwaMc36qqv11kGV0uhrn5V9UTquq86q1V\nHrrIeM9KcrskLxh14QUzve+82Hqqqj+pqlNGvw9U1T6LTH/uEZGHVdXXkvzP6P7bo/5dVFWfr6o7\nzIyz6HJfar5V9fzqzfO/W1XHV9XtZvodWlVHVtXrquq7SQ4Z6/9vxvL/3hjn+lPKP6+Mc9N+85jO\nZ6vqZjP9nzIzjy9W1X3mlfXjVfWf1bfVU+fqx+h/zbq0xdHXq2+3288bd7ZOL7hfmRfvVavq+0m2\nT9/P/O/ofuPq+4aLqurkqrrXzDiHVdWLq+q9VfWD9AvT+dPdrapeXVXnjGX2XzP9NnmfUP3RnQ8l\n2XPUzcPG8EvVnaOr6llV9fEkP0zyixOW4ceq7wcuHPXtrhPLdI/qrc8uGvH8+kL1YwG3TPKG1tqF\nrbWft9ZOba0dOaa5X5I/T/LA1tr/tNZ+3Fr7YWvt9a21Z89M43ZJ9kxPNjygZva/Y/pvb62d07oz\nW2uXOTYBrDutNR8fH59t+pPkzCR3TvKl9LtJ2yc5K/2uWUuycQz3miTvSL8btDHJl5M8bPR7Y5K/\nTU++7pjkgJnptyS/tMT8P5XkH5bov1uSC9PvVG1IbzFxYZJrj/5Hp9/l3zf9TtgXR2x3HsO/Jsmr\n58XzziTXSPKrSX6c5MNJfnFm/IPHsHdMckGS30hy1ST/meSYedN6d3qriRskOT/J7y9SjoOTfH58\n3z89WbDfvG4/SrLDhOV9SJKfJXn0KONOSR6Z3rLh+mOZHTXi25Bk5yTfTXKjMf71kvzqInEeluSZ\nM7/vkOTs8f1Go27sOX5vTLLv+P64sS73HsvqpUneOPrdJMn30xMmV03y3BH/nZeLYcz/Z0menuQq\nSe6WfkF1rUXGPTrJn87rtuh6SnLv9Ppz47GsnprkE4tMe+OY1mvGMt0pyV5JvjXi2i69Fc23kuyx\n1HJfbr5JHpTk2qPfE5J8I8mOo9+hSX46prHdiOOvkpw41lEluVku3UY2pZ7OTfugsbyfmN5S6Sqj\n//3SL/i2S3L/JD9Icr159fIvx7j3T/KdJLuN/v816sXOSa6T5NNJ/t8SdXrR/coCcV+ynxnzPi3J\n3yTZIX07/t7MejhsxHXbuWkvML33JHlzkmuN6d1+S/cJmdmWxu9F685MXf5a+n5qw4hjuWX40yQP\nT9+P/1mSc5LUMmX6jSTnJfmtMd7B6ceFq05Y1q9IbwHy0CT7zRvukUm+OuEY9MokR4yYvpXkvjP9\nnjqWwaOS/NpcWXx8fHzW82fNA/Dx8fFZ7U8uTSI8Nck/Jfn99DtmG8bJ4sZxYvnj9Gd558b7f0mO\nHt9fk+RlSfZeYPrLJRG+kuSRS/R/cJJPz+v2ySSHjO9HJ/nbmX7/luR9M7/vmeSEefHcdub38Ume\nPG/8fx/fX5nkn2f6XX2cpG+cmdZswuSIJE9ZpBwbk1w8TuD/MsmzRvevz3Q7anRbbnkfkuRr86b/\nP7PLMcldctkkwkVJ/iDJTsvUh8OyeBLhl9IvNu6ccVE5M9wpSe408/t6Y1ltSPJ3Sd4002/nJD/J\n9CTCj5JsmOl/XpLfXmTco7NwEmHB9ZTkfRnJmfF7u/QkxT6LrMOW5Bdnuj05yWvnDfeB9AuxRZf7\npsx39L8wyc3G90Mzc+E6un0pyYGLjLsp9fTQJJ+aF9e5SW63yPAnzM131MtLLlpHt0+nb8PXTa/T\nO830e2AurfOH5PJ1etH9yiJlnLuwvV160mW7mf5vTHLoTP16zRLTul6Sn2eBRFW2YJ+QyycRFq07\nM3X56TP9pizD02b6XW3E8wvLlOnFSZ6xQH26/YRlvVN6sub4sRxOS3LX0e9vZ+vSItO6Wnqi7d7j\n90uTvGOm//bprRk+Psp+ztzy8fHx8VmvH48zAFcmr03yR+knovObi+6efkfvqzPdvpp+Jy1JnpR+\n9/PTo+nwn2zCfL+VfoK7mD3nzXf+vJPkmzPff7TA76vPG3/q8JeZd2vt+yPe2XnPvmX9hwvMa27c\nM5OcneSA9DvyHx29PjnTbe7xj+WWd9JbBMzac1632bh/kH5X+JFJzq2q91TVrywU51Jaa6eltzg4\nNMl5VfWmmabc+yR5+2gOfVF6UuHi9Aufy8Q24vnWJsz6W621n838XnQ5L2Gx9bRPkufPxP3t9Lq8\nVxY3u5z3SXK/ufHHNA5Ivzu/1HJfcr7VH984ZTTlvyi9lczsy0/nr//r57LPok8t/5Lla639PL3e\n7jnieshMs/eLktx0Xlxfb621md9fHePuk36n+dyZcV+afjd9sTJt7n5lzyRnjdhn41hq+5l1/STf\nbq1duMi0V2SfkCXqziJxTlmGl8y7tfbD8fXqy5RpnyRPmBfH9UdZl9Ra+1Fr7R9ba7+Z3nLmiCRv\nqf4o03L79iS5T3oLlPeO369Pcteq2mNM/+LW2gtba7dNb93xrCSvqqobLxcbwFqRRACuNFprX01v\ntny3JG+b1/uC9LtM+8x0u0H6XfS01r7RWnt4a23P9DvmL6rp/1Luv5PcpxZ/adg58+Z7mXmvssvM\nu/p/i7j2Fsz7o+nJglsn+cS8bgfk0iTCkst7mL1QS/rd4tln4G8w27O19oHW2u+mn9SfmuTli8T4\ng/S7g3Mu8xb51tobWmsH5NLHXZ4zep2Vfgdy15nPjq21r8+Praqulr4cV8P85bKcs9Kbg8/GvVNr\n7RNLjDM7j7PS7ybPjr9zG898L7HcF51v9fcfPDnJH6bfOd41vfn97HsM5pfzrKzci0hn19V26Y+o\nnFP9nQ0vT/IX6Y9K7JrkpHlx7VV1mfct3CB9Ozor/U7y7jPlvUZr7VcXK9MW7FfOSXL9efuU5baf\nWWcl2a2qdl1k2iu1T1iy7iwQ55RluNS8FivTWekto2bjuFpr7Y2bUpjW2neT/GN6C5wbpj8mtndV\n7b/EaAenJzm+Vv3fXr4lPVHywPkDjoTFC9Nb5dxkU2ID2JokEYArm4clueO4g3qJ1trF6XeYnlVV\nu4yLiccneV2SVNX9qmrvMfiF6Se+F4/f30x/38Binpv+foLDx3RTVXtV1XPHy73em+SXq+qPqmpD\n9Zcz3iT9uePV9oYkD62qm1fVVdNPkI8drQo2xzHpL7A8Z5xwJ8nHRrdrprdKWHZ5L+KIJI+pqr2r\n6lpJnjLXo6quW1X3Ghc8P05/P8HFi0znhCR3Gy9h+4X0lgdz07lRVd1xLIv/S2+1MTedl4x459bh\nHlV14Oh3ZJJ7VNUB46VpT8/qHWOXq2/zvSTJX1fVryaXvPzvfpsw/uuS3LOqfq/6yw13rP4yyL2X\nWe5LzXeX9Luz5yfZUFV/l76NLOUVSZ5RVftV9+tVtbmJmt+sqvtW/w8Jjxuxfyr94rCNuFL9BZc3\nnTfuddLr4VVGeW6c5L2t/4eSDyb5t6q6RvUXce5bVbdfLIhl9itLOTY9GfakEccd0h9retOUwo9Y\n35eetLjWmMbcf1FZyX3ConVnibg2aRlOLNPLkzyyqn5r1J2dq+ruVbXLctOtqqdV1S2r/yvGHdNf\njnhRki+11r6S5EVJ3jjKtcMo4wOqv6BzryR3SnKPJDcfn5ulJyYPHtN/3Bh3p7H/Pzh9+/AfGoB1\nSxIBuFJprf1va+24RXo/Ov3E/PT0C983JHnV6HfLJMdWf0v6O5M8trV2xuh3aHqC4KKq+sMF5vnt\nJLdJv/N+bFV9L/0O1nfSn+/9VvpJ5hPSm8c+Kck9WmsXbGl5l9Na+3CSpyV5a/rd9H2TPGALJvmR\n9Iusj810OyH9ueLjZ5ofJ0sv74W8PP156s8n+Wwu25pku/Tld056s/nbp7+obCGvHdM4M/2CZfaN\n+FdN8uz0lhLfGGX5m9Hv+enr/oNjHX4q/UVtaa2dnP5c8xvSl+OF6U3kV8PzkxxU/e3z/7HcwK21\nt6dftLyp+n86OCnJXZce6zLjn5XkwPTlcH76Xd2/Sl/miy73Zeb7gfQLvi+nN53/vyzd/D7pybgj\n0tfZd9Of3d/cf9P6jvTHMOZeaHrf1tpPW2tfTH9nyCfTkzW/lv6s+qxj018YekF60/ODxjac9GTZ\nDukvL70wPbm0VHP3pfYri2qt/ST9Xw3edcTxoiQPaa2duty4Mx6cvk86Nf0dHI8b016xfcIydWcx\nm7oMZy1WpuPSX8b4gjHN09Ifa5tUjCSvTl/O56S/HPLu4zGPpP9rxxckeWF6cuF/0x9heNeI54TW\n2gdHq5NvtNa+keQ/kvx6Vd00PVH5b+n7mwvS9yN/0Fo7fWJ8AFvd3NtsAQC2eVV1aPpL8x60GeMe\nkv5SywNWOi4AuKLQEgEAAACYRBIBAAAAmMTjDAAAAMAkWiIAAAAAk0giAAAAAJNs2Joz23333dvG\njRu35iwBAACAZRx//PEXtNb2WG64rZpE2LhxY447brF/zw4AAACshar66pThPM4AAAAATCKJAAAA\nAEwiiQAAAABMIokAAAAATCKJAAAAAEwiiQAAAABMIokAAAAATCKJAAAAAEwiiQAAAABMIokAAAAA\nTCKJAAAAAEwiiQAAAABMIokAAAAATLJhrQM4/8WvW+sQlrTHnz1orUMAAACAdUFLBAAAAGASSQQA\nAABgEkkEAAAAYBJJBAAAAGASSQQAAABgEkkEAAAAYBJJBAAAAGASSQQAAABgEkkEAAAAYBJJBAAA\nAGASSQQAAABgEkkEAAAAYBJJBAAAAGASSQQAAABgEkkEAAAAYBJJBAAAAGASSQQAAABgEkkEAAAA\nYBJJBAAAAGASSQQAAABgEkkEAAAAYBJJBAAAAGASSQQAAABgEkkEAAAAYBJJBAAAAGASSQQAAABg\nEkkEAAAAYJJJSYSq+suqOrmqTqqqN1bVjlV1w6o6tqq+UlVvrqodVjtYAAAAYO0sm0Soqr2SPCbJ\n/q21mybZPskDkjwnyfNaa/sluTDJw1YzUAAAAGBtTX2cYUOSnapqQ5KrJTk3yR2THDn6H57k3isf\nHgAAALBeLJtEaK19Pcm/JvlaevLgO0mOT3JRa+1nY7Czk+y1WkECAAAAa2/K4wzXSnJgkhsm2TPJ\nzknuusCgbZHxH1FVx1XVceeff/6WxAoAAACsoSmPM9w5yRmttfNbaz9N8rYkt0my63i8IUn2TnLO\nQiO31l7WWtu/tbb/HnvssSJBAwAAAFvflCTC15L8dlVdraoqyZ2SfDHJUUkOGsMcnOQdqxMiAAAA\nsB5MeSfCsekvUPxskhPHOC9L8uQkj6+q05JcO8krVzFOAAAAYI1tWH6QpLX290n+fl7n05PcasUj\nAgAAANalqf/iEQAAALiSk0QAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAm\nkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaR\nRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFE\nAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQA\nAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAA\nAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAm2bDWAWwrvvHiZ651CEv6hT976lqHAAAAwBWclggAAADA\nJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAkkggAAADAJJIIAAAAwCSSCAAAAMAk\nkggAAADAJBvWOgDWl1NfeOBah7CkX/nzd6x1CAAAAFdaWiIAAAAAk0giAAAAAJNIIgAAAACTSCIA\nAAAAk0giAAAAAJNIIgAAAACTTEoiVNWuVXVkVZ1aVadU1a2rareq+lBVfWX8vdZqBwsAAACsnakt\nEZ6f5P2ttV9JcrMkpyR5SpIPt9b2S/Lh8RsAAADYRi2bRKiqayT5nSSvTJLW2k9aaxclOTDJ4WOw\nw5Pce7WCBAAAANbelJYIv5jk/CSvrqrPVdUrqmrnJNdtrZ2bJOPvdRYauaoeUVXHVdVx559//ooF\nDgAAAGxdU5IIG5L8RpIXt9ZukeQH2YRHF1prL2ut7d9a23+PPfbYzDABAACAtTYliXB2krNba8eO\n30emJxW+WVXXS5Lx97zVCREAAABYD5ZNIrTWvpHkrKq60eh0pyRfTPLOJAePbgcneceqRAgAAACs\nCxsmDvfoJK+vqh2SnJ7koekJiCOq6mFJvpbkfqsTIgAAALAeTEoitNZOSLL/Ar3utLLhAAAAAOvV\nlHciAAAAAEx+nAGucI5++d3XOoQl3eHh71nrEAAAADaJlggAAADAJJIIAAAAwCSSCAAAAMAk3okA\n69yRr/79tQ5hSQc99P1rHQIAALCVaIkAAAAATCKJAAAAAEwiiQAAAABMIokAAAAATCKJAAAAAEwi\niQAAAABMIokAAAAATCKJAAAAAEwiiQAAAABMsmGtAwCuHF762t9b6xCW9P8e/IG1DgEAANY9LREA\nAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAA\nAIBJJBEAAACASTasdQAAVySHHvF7ax3Ckg79ww+sdQgAAGzDtEQAAAAAJpFEAAAAACaRRAAAAAAm\nkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaR\nRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFE\nAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQA\nAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpFEAAAAACaRRAAAAAAmkUQAAAAAJpmcRKiq7avqc1X1\n7vH7hlV1bFV9pareXFU7rF6YAAAAwFrblJYIj01yyszv5yR5XmttvyQXJnnYSgYGAAAArC+TkghV\ntXeSuyd5xfhdSe6Y5MgxyOFJ7r0aAQIAAADrw4aJw/17kicl2WX8vnaSi1prPxu/z06y10IjVtUj\nkjwiSW5wgxtsfqQArJi7vuMP1jqEZb3vwLeudQgAAMyzbEuEqrpHkvNaa8fPdl5g0LbQ+K21l7XW\n9m+t7b/HHntsZpgAAADAWpvSEuG2Se5VVXdLsmOSa6S3TNi1qjaM1gh7Jzln9cIEAAAA1tqyLRFa\na3/dWtu7tbYxyQOS/E9r7Y+THJXkoDHYwUnesWpRAgAAAGtuU/47w3xPTvL4qjot/R0Jr1yZkAAA\nAID1aOqLFZMkrbWjkxw9vp+e5FYrHxIAAACwHm1JSwQAAADgSkQSAQAAAJhEEgEAAACYRBIBAAAA\nmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhkw1oHAABb4m5vf+Zah7Ck997nqWsdAgDAitES\nAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIB\nAAAAmEQSAQAAAJhkw1oHAAAkd3/bi9c6hCW9575/ttYhAADrgJYIAAAAwCSSCAAAAMAkHmcAAFbM\nPY58/VqHsKR3H/THk4a715HvWuVItsw7D7rnpOHu89aPrXIkW+7tf3DAWocAwCbQEgEAAACYRBIB\nAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEA\nAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmEQSAQAAAJhEEgEAAACYRBIBAAAAmGTDWgcA\nAADLuf/bTlvrEJb05vv+0qThXvj2b65yJFvmz+9z3bUOAVjntEQAAAAAJpFEAAAAACbxOAMAALBJ\n3vfmC9Y6hCXd9f67r3UIsM3SEgEAAACYRBIBAAAAmEQSAQAAAJjEOxEAAIArpc+94ry1DmFZt/jT\n60wa7tx//voqR7JlrvekvdY6BFaIlggAAADAJJIIAAAAwCQeZwAAAGBd+Oa/H7/WISzpuo/7zUnD\nnfeCD65yJFvmOn9xl80eV0sEAAAAYBJJBAAAAGASSQQAAABgEkkEAAAAYBJJBAAAAGASSQQAAABg\nEkkEAAAAYBJJBAAAAGASSQQAAABgEkkEAAAAYJJlkwhVdf2qOqqqTqmqk6vqsaP7blX1oar6yvh7\nrdUPFwAAAFgrU1oi/CzJE1prN07y20n+vKpukuQpST7cWtsvyYfHbwAAAGAbtWwSobV2bmvts+P7\n95KckmSvJAcmOXwMdniSe69WkAAAAMDa26R3IlTVxiS3SHJskuu21s5NeqIhyXUWGecRVXVcVR13\n/vnnb1m0AAAAwJqZnESoqqsneWuSx7XWvjt1vNbay1pr+7fW9t9jjz02J0YAAABgHZiURKiqq6Qn\nEF7fWnvb6PzNqrre6H+9JOetTogAAADAejDlvzNUklcmOaW19tyZXu9McvD4fnCSd6x8eAAAAMB6\nsWHCMLdN8uAkJ1bVCaPb3yR5dpIjquphSb6W5H6rEyIAAACwHiybRGitfSxJLdL7TisbDgAAALBe\nbdJ/ZwAAAACuvCQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAA\ngEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACA\nSSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJ\nJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkk\nEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQR\nAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEA\nAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJJBEAAACASSQRAAAAgEkkEQAA\nAIBJJBEAAACASSQRAAAAgEkkEQAAAIBJtiiJUFW/X1VfqqrTquopKxUUAAAAsP5sdhKhqrZP8sIk\nd01ykyQPrKqbrFRgAAAAwPqyJS0RbpXktNba6a21nyR5U5IDVyYsAAAAYL3ZkiTCXknOmvl99ugG\nAAAAbIOqtbZ5I1bdL8nvtdb+dPx+cJJbtdYePW+4RyR5xPh5oyRf2vxwJ9k9yQWrPI+tQTnWn22l\nLMqxvijH+rOtlEU51pdtpRzJtlMW5VhflGP92VbKohzT7dNa22O5gTZswQzOTnL9md97Jzln/kCt\ntZcledkWzGeTVNVxrbX9t9b8VotyrD/bSlmUY31RjvVnWymLcqwv20o5km2nLMqxvijH+rOtlEU5\nVt6WPM7wmST7VdUNq2qHJA9I8s6VCQsAAABYbza7JUJr7WdV9RdJPpBk+ySvaq2dvGKRAQAAAOvK\nljzOkNbae5O8d4ViWSlb7dGJVaYc68+2UhblWF+UY/3ZVsqiHOvLtlKOZNspi3KsL8qx/mwrZVGO\nFbbZL1YEAAAArly25J0IAABpxXkYAAAUDUlEQVQAwJXIFSKJUFUbq+qktY5jS1TVoVX1xE0cZ/+q\n+o/Vimm1VdUhVbXnWsfBFVdVfX+tY7gyqKozq2r3FZrWI6vqIeP7ZfYBKzmf1TDifcFmjHevqnrK\nasS0wLx2rapHje97VtWRW2O+q2GxY3tVPb2q7rzMuJt8TF0JS9WRqnrvWD+XrKPNnMe9q+ommx/l\nllmqXlXV0VW1Lt4MPquq7ldVp1TVUeP3G6vqC1X1l8vVpy0916qqv9nccVfL3LFzbGN/NNN9Tc4r\nq+oxY/1cOLevnN2Gt7XzxSn7sPVidj9cVTevqrutdUwrZX55tvS4sVrHnbnpTjz23WtmG5p0rFit\nuLfonQisrtbacUmOW+s4tsAhSU7KAv/688qkqja01n621nHAaht1/SUznQ7JlWAf0Fp7Z7befyfa\nNcmjkryotXZOkoO20ny3mtba3611DFW1fWvt4k0Zp7V2tzHuxox1tJmzv3eSdyf54tQRVvI4cwWt\nVw9L8qjW2lFV9QtJbtNa22fKiCtwrvU3Sf5xC8ZfTRuT/FGSNyRrel75qCR3ba2dsUj/Q7KJx4r1\nfG61HvZhm+nmSfbP+nvf3ea6QpVnSr2Zd76xyceKlXSFaIkwbKiqw0dm+ciqulpV/V1VfaaqTqqq\nl1VVJZdkPL84hn3T6LZzVb1qDP+5qjpwtQOuqr+tqi9V1X8nudHotm9Vvb+qjq+qj1bVr4zu9xvl\n+HxVHTO63aGq3j2+71FVH6qqz1bVS6vqq1W1+8ggnlJVL6+qk6vqg1W10wrFv3NVvWfEdFJV3b+q\n3j7T/3er6m1VtX1VHTaGOXFk/g9K33BfX1UnVNVOVfWbVfWRUfYPVNX1xnSOrqrnVdUxoyy3HNP9\nSlU9cyXKMhPzxqo6dYG6dKdRL04c9eSqVXWrqnrbGO/AqvpRVe1QVTtW1emj+2Lr87Cqem71uyLP\nWckyTCzjSTO/nziykOtiu9gc1f3LTB27/+j+oqq61/j+9qp61fj+sJWuO5sY70PGcv58Vb121IcX\nV9VRVXV6Vd1+LPdTquqwmfEeOMp3UlU9Z3S73Pa1hbH916ivJ1fVIxbo/7SxjXyo+t28uTtFN6+q\nT41yvb2qrjW6H11V/1hVH0ny2Lo0o365fcCYxaOr78dOnNleDh3b5Aert1a4b1X98xjm/VV1lc0p\nV1V9v6qeNdbDp6rquqP7Pavq2FHn/3uu+8y0dqmqM+bmW1XXGHFdZZHt6JK707XAvnyFPTvJvmOZ\nvqUuvYN0yFgG7xqx/0VVPX6U8VNVtdsYbsF91hravuYdv0Z9P2jEe7dRHz9WVf9R45g43GTUv9Or\n6jGbMtMl6svTq+rYJLeufiz6xFiXn66qXcboe45l+JWq+ueZac61tJldR/8y+v1V9f3sF6rqH2bG\nmb+vuE2SeyX5lzH+vjVz97/6cf/M8f2QUQfeleSDS81nieXwnJppNTG2xSfM1KudqupNY3pvTrLT\nzLB3qapPju35LVV19dH9csfTTVk3E2J+0FgfJ1Q/H/r7JAckeclY3h9Mcp3R/3bz6tPl1mld9lxr\nwWPiWNZvm7/eq+rZSXYa83r9CpZx7lzlFWN/8vqqunNVfXzM/1Y17+7iGG7jvEk9O8ntRnx/OVvW\nraWqXpLkF5O8c8Twgnn9N/V88ZLjzVYsw4Ln2rX4cXG2zj27Lj1m/OvotkdVvXXUs89U1W1XON7F\ntuvLnUfNDLNDkqcnuf9YD/cf9ewTY1v4RFXNXctcraqOmNsvVD+ezu2jFtwvbEFZpmwLl9tuFyrP\nmOSCx43qx8uTxudxM90vdy23Ehaabk049o190Qtq4WPFw8cy+PyoX1dbqXgX1Fpb95/0TGpLctvx\n+1VJnphkt5lhXpvknuP7OUmuOr7vOv7+Y5IHzXVL8uUkO69izL+Z5MQkV0tyjSSnjZg/nGS/Mcxv\nJfmf8f3EJHvNi/kOSd49vr8gyV+P778/lsfuY9n8LMnNR78j5sq5AmX4gyQvn/l9zSSnJtlj/H5D\nknuOsn5oZri5+I9Osv/4fpUkn5gZ9/7p/xZ0brjnjO+PHevvekmumuTsJNde5br01CRnJfnl0e01\nSR6X3lLnjNHtX5N8Jsltk9w+yRtH98XW52Hp2cHt12h7OWnm9xOTHLoetovNKMv3Z+rih9L/nex1\nk3xt1JEHJPmXMcynk3xqfH91kt9bo5h/NcmXkuw+fu826sObklSSA5N8N8mvpSdyj0/Plu85yrXH\nqHv/k55lXnD72oL4dht/d0q/83PtJGem70/2T3LC6LdLkq8keeIY/gtJbj++Pz3Jv4/vR6ffFZ+b\n/qEz4xydsQ8Yv89M8ujx/VFJXjEzzsfS9xM3S/LD9LtWSfL2JPfezHK1XHpc+OckTx3fr5Vc8mLh\nP03yb+P7IUleMFOH7j2+P2JmmIW2o9nxLrcvX+H6tTFj+573/ZD048wuow59J8kjR7/nJXnc+L7g\nPmuNtpWNWeD4lb69HJRkx/R98w1H/zfm0mPioenHlKum191vJbnKFm4HLckfju47JDk9yS3H72uk\nb5eHjO7XHPF9Ncn1Z+r33HF5dh98l/Q3alf6Nv/uJL+TBfYV4+9hSQ6aGf/oXHos3T3JmTPr/OyZ\n8RaczzLL4RZJPjLz+4sjtrl69fhceqz+9bG+9h9xHJNxvEjy5CR/N7POLnM8XcE6c+Mk75pb1+mt\nPR4ybxnNX/5z9WmxdXqHmXq14DFxmfX+/VXcNmaPE6/KpceQ/8rMvnaMc1KSjbMxzZZtod9bcVs/\nc9SZQ3LpvvKS+LNp54sv2pqxz1sf8/dVix0X5+rcbunb+NzxZu6Y8YYkB4zvN0hyygrHu9B2fXAW\nPo+6ZHuZXT+z28j4fuckbx3fn5jkpeP7TbPMfmErbAtLbbez5Tk0Cxw3cuk1285Jrp7k5LEMF7yW\nW4H1s9g14ly9WerYd0mZcvljxbVnvj8zl55vHboScc//XJEeZzirtfbx8f11SR6T5IyqelL6Stgt\nfaW/K32jfn1V/Vd65Ur6wfVeM1nbHTM23FWK93ZJ3t5a+2GSVNU7xzxvk+Qt1RtNJL0iJ8nHkxxW\nVUckedsC0zsgyX2SpLX2/qq6cKbfGa21E8b349M3uJVwYpJ/rX5H9N2ttY9W1WuTPKiqXp3k1ukH\n712S/GJV/WeS92TcEZnnRuk7mg+Nsm+f5NyZ/nNNc05McnJr7dwkqX7H//rpG/pKmV+Xnpa+DL88\nuh2e5M9ba/9eVadV1Y2T3CrJc9NPrrZP8tGRXV1sfSbJW9omNoddZethu9hcB6Qnbi5O8s1xF+KW\nST6a5HHVnwn7YpJrjTsWt07fR6yFOyY5srV2QZK01r496se7Wmutqk5M8s3W2olJUlUnp2+z+yQ5\nurV2/uj++vT69owsv31tisdU1X3G9+sn2W+m3wFJ3tFa+9GI4V3j7zXTT34+MoY7PMlbZsZ78ybM\nf27/dnyS+850f19r7adj+Wyf5P2j+4mZtk9bqFw/Sb+Qmpvf747veyd586grOyQ5Y4HpvSLJk9K3\nlYcmefjovtB2NGu5fflqOqq19r0k36uq76QfD5O+DH99wj5rLSx1/PqVJKe3S5tAvzE9oTPnPa21\nHyf5cVWdl35ifPbE+S5UXy5O8tbR7UZJzm2tfSZJWmvfTZKx3D7cWvvO+P3F9G33rCXmdZfx+dz4\nffUxv5tl3r5iYuyzPjQz3mLzWbRFTGvtc1V1nerPo++R5ML0i4s5v5PkP8awX6iqL4zuv53kJkk+\nPpbJDkk+mb7cLnc8TfLvm1G2hdwp/QT8M2O+OyU5b+K4S63TOYsdE5NNX+9b6ox5x4kPzxxDNqYn\nfLdFy50vbsrxZiXN31ftm6WPi0m/YfB/SV5RVe/JpcejO6ffEZ8b7hpVtcvYf2+xRbbrm2fh86gv\nLDGpayY5vKr2S0+yzrUKPCDJ88e8TpqwX9hSy20Le2fx7Xa+hY4bB6Rfs/1gzONt6ddx2+Xy13Ir\nYaFrxFnLHfsWc9PqrXB3Td//f2CF4l3QFSmJ0Bb4/aL0zOVZVXVoeqVJkrunH/juleRpVfWr6Rmr\nP2itfWkrxTsX46ztklzUWrv55QZs7ZFV9VvpsZ9QVfOHqfnjzPjxzPeLM9PccEu01r5cVb+Z5G5J\n/qmqPph+Yv2u9J3iW1p/Hu3CqrpZkt9LP1n4wyR/skD8J7fWbr1MGX4+rzw/z8rX0/nrZSkfTXLX\nJD9N8t/pWb/t0zOGi67P4QdbEOOW+Fku+6jSetsuNseC9b+19vXqzQd/P/1Eebf0+vf9lToYb4bK\nwnVsuTq+4LOdrbUp29e0wKrukH7ycuvW2g+r6uhcWj/mYt8cm1LX58p+cS67bf84SVprP6+qn7aR\nPs+EfcAS5Zqdzuz8/jPJc1tr7xzjHjp/mq21j1dvRnn79BZFc48ILbQdzY53uX15a20lk6BLmV+v\nZuvchiy/z1oLSx2/lquP88eddKxYor7830zid7HteHPmW0n+qbX20nlxPGaJecya3afvOK/f7La3\n4HwmODL97tcvpLeYmm+hGCs9gfHAy3S8/LnLSqskh7fW/nrefI+eOO5yy3vBY+LYpjervm2B5bbn\nxY71V3TLnS+u1bnV/PW/63IjtNZ+VlW3Sk9+PSDJX6TfaNguff/zo9UIdJi/Xe+7GdN4Rnpy+j7V\nH5U5enRfbN+84H5hBSy3LVycxbfbpaY1tx0vdazZlGuGTbHUdDf3XOyw9BaUn6+qQ9JbHq2aK9I7\nEW5QVXM7lAemN31NkgvG3ZW5Z0i2S29idlT6XaTZbMyjqy55b8ItVjneY5Lcp/ozU7ukN/v/YXrr\nifuNGGpcHKSq9m2tHdv6SzUuSL8zMutj6RcPqaq7pDfHXVUjg/nD1trr0pvz/0brL1w6J/0RgMPG\ncLsn2a619tb0u/q/MSbxvfRWCklvzrXH3Dqs/mzxZU6+t6L5dem/k2ysql8a3R6cZC6zfEz6ow2f\nHHeIr52eITx53MVYcH2usW+mPw967erPot4jfVtfD9vF5jom/bm27atqj/SLuE+Pfp9MX0fHpCd9\nnjj+rpUPJ/nDqrp2ktR4Fn2CY5Pcvvozz9un182PLLF9bY5rJrlwXDj9Svpdg1kfS3LP6u/9uHr6\nhXDG3bcLq+p2Y7jZbWQps/uA1bRcuRYa/uvj+8FLDPea9DsAr06WPL5cYsK+fEtt9jJdx/usxZya\n3gpn4/h9/8UH3SRT6sup6e8+uGVyyXsypl40zl9HH0jyJ3XpOwP2qqrrZPF9xfzxz0y/+54s/cLD\nxeaznDelX+AclH7hMeuYJH88pnfT9EcakuRTSW47d9ys/oz0L6cvt8WOpyvhw0kOmitXVe1WVftM\nHHfKOt2cY+JPa8J7W1bBmRnHg6r6jSQ3XGCYrbUP3lLr9XxxKcseF8e2eM3W2nvTz1PmkmwfTE8o\nzA23Gsm3+dv1UudRc+bXl9lj5SEz3WevSW6S/qhBsvh+YbUttt1Orf/HJLn3iHfn9JbfH83C13Ir\nYbnpTj32zS/fLknOHfujP16hWBd1RWqJcEqSg6vqpenP6b44/UL6xPQd6WfGcNsneV315reV5Hmt\ntYuq6hnpzem+MCrZmekXV6uitfbZ6i8hOiH92bm5i5o/TvLiqnpqerOgNyX5fPqLMfYbMX94dLv9\nzCT/Ickbq78Y5CPpTbu+l3knsCvs10ZcP0+/E/9no/vr059Vm3sb6F5JXj1OsJNk7g7BYekvOvpR\nevPyg5L8x1g3G9LXx8mrGP9i5telx6bv+N4yTig+k2TuDfPHpjd1mmsO+oUk583c3Vxsfa6Z0ST8\n6emxn5G+M1oX28UWeHt6Hfp8evb2Sa21b4x+H01yl9baaVX11fTWCGuWRGitnVxVz0pPAFycS5sW\nLzfeuVX110mOSl9H722tvWNc5C20fW2O9yd5ZPWmh19Kr/ezMXymerO6z6fvt45LP1FK+sX2S6q/\nqOf09Cb+yzksl90HrJYly7WAQ9O396+PYRc6+U76vu6Z6YmEZPHtaHachfblK6a19q3qL5Q6KZv3\n2NG622ctprX2o+ovB3t/VV2Qy5/wbq5l60tr7SfjePuf1V8K+qP01gtT4p5dR+9rrf1V9cfiPjnq\nyvfTn99daF9xSPo6eXn1lgoHpSfxj6iqB6e/K2Wx+X5woflkmeb+I45dknx97Ic2zvR+cfr+5wvp\n5zOfHuOcX/1O1xvr0hcnPnW0YHxoFj6ebrHW2hdH3f3g2Cf+NL2F1pRxp6zTzTkmvmwM/9nW2qqf\nuM94a5KHVNUJ6cv5ywsM84UkP6uqz6fvjycdj9bAYVmf54vLWe64uEuSd1TVjunHhLkXIz8myQvH\ndrUh/RzzkSsZ2ALb9YLnUfO296OSPGXUqX9Kf5fQ4VX1+Fx23/Oi0f0L6XXqC0m+s9h+IQvXzZW0\n2HY7vzwLGtdsh+XSY8wrWmufS5JFruW2yBLXiHP9px775h8rnpZ+7v/V9OvjVU0g1qXXQqxnY2O8\neDSNunWSF69Vk9Tqb9b9XGvtlWsx/y0xdpbvbq3ddI1DgXWpqq7eWvv+OCk6JskjWmufXeu41kL1\ntyQf2Fp78FrHcmU1Ux8ryQuTfKW19ry1jgvgyqp6a8mrtNb+r6r2TU+Y/3Jr7SdrHNo244pw7Lsi\ntUS4srtB+p2I7dJfFvbwZYZfFVV1fPrzaE9Yi/kDq+5lo3nijunPHl9ZEwj/mf4+lLutdSxXcg+v\nqoPTX9D1uSSb+rw/ACvrakmOGs3mK8mfSSCsuHV/7NMSAQAAAJjkivRiRQAAAGANSSIAAAAAk0gi\nAAAAAJNIIgAAAACTSCIAAAAAk0giAAAAAJP8/+BNdcKdRNFyAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a25ba7ac8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "IS_common_words = [word[0] for word in IS_counts.most_common(20)]\n",
    "IS_common_counts = [word[1] for word in IS_counts.most_common(20)]\n",
    "\n",
    "fig = plt.figure(figsize=(18,6))\n",
    "sns.barplot(x=IS_common_words, y=IS_common_counts)\n",
    "plt.title('Most Common Words used in the research papers for conference ISCAS')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Title</th>\n",
       "      <th>Conference</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2121</th>\n",
       "      <td>Architectural Issues in Distributed Data Base ...</td>\n",
       "      <td>VLDB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>Compressive sampling of EMG bio-signals.</td>\n",
       "      <td>ISCAS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2479</th>\n",
       "      <td>User-Centered Modeling of Interactive Web Sites.</td>\n",
       "      <td>WWW</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1292</th>\n",
       "      <td>A Decomposition Method for Transmission Schedu...</td>\n",
       "      <td>INFOCOM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1599</th>\n",
       "      <td>ASCENT: Adaptive Self-Configuring sEnsor Netwo...</td>\n",
       "      <td>INFOCOM</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  Title Conference\n",
       "2121  Architectural Issues in Distributed Data Base ...       VLDB\n",
       "56             Compressive sampling of EMG bio-signals.      ISCAS\n",
       "2479   User-Centered Modeling of Interactive Web Sites.        WWW\n",
       "1292  A Decomposition Method for Transmission Schedu...    INFOCOM\n",
       "1599  ASCENT: Adaptive Self-Configuring sEnsor Netwo...    INFOCOM"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.base import TransformerMixin\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.svm import LinearSVC\n",
    "from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS\n",
    "from sklearn.metrics import accuracy_score\n",
    "from nltk.corpus import stopwords\n",
    "import string\n",
    "import re\n",
    "import spacy\n",
    "spacy.load('en')\n",
    "from spacy.lang.en import English\n",
    "parser = English()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "STOPLIST = set(stopwords.words('english') + list(ENGLISH_STOP_WORDS))\n",
    "SYMBOLS = \" \".join(string.punctuation).split(\" \") + [\"-\", \"...\", \"”\", \"”\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "class CleanTextTransformer(TransformerMixin):\n",
    "    def transform(self, X, **transform_params):\n",
    "        return [cleanText(text) for text in X]\n",
    "    def fit(self, X, y=None, **fit_params):\n",
    "        return self\n",
    "    def get_params(self, deep=True):\n",
    "        return {}\n",
    "    \n",
    "def cleanText(text):\n",
    "    text = text.strip().replace(\"\\n\", \" \").replace(\"\\r\", \" \")\n",
    "    text = text.lower()\n",
    "    return text\n",
    "\n",
    "def tokenizeText(sample):\n",
    "    tokens = parser(sample)\n",
    "    lemmas = []\n",
    "    for tok in tokens:\n",
    "        lemmas.append(tok.lemma_.lower().strip() if tok.lemma_ != \"-PRON-\" else tok.lower_)\n",
    "    tokens = lemmas\n",
    "    tokens = [tok for tok in tokens if tok not in STOPLIST]\n",
    "    tokens = [tok for tok in tokens if tok not in SYMBOLS]\n",
    "    return tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy: 0.7463768115942029\n",
      "Top 10 features used to predict: \n",
      "Class 1 best: \n",
      "(-0.9286016613168848, 'database')\n",
      "(-0.8479546379060897, 'chip')\n",
      "(-0.7676015086504997, 'wimax')\n",
      "(-0.693351007368828, 'object')\n",
      "(-0.6728544568458809, 'functional')\n",
      "(-0.6625154615527803, 'multihop')\n",
      "(-0.6410221284923994, 'amplifier')\n",
      "(-0.6396379083504244, 'chaotic')\n",
      "(-0.6175858508425204, 'receiver')\n",
      "(-0.6016681457116699, 'web')\n",
      "Class 2 best: \n",
      "(1.1835969416744359, 'speccast')\n",
      "(1.0752019094716938, 'manets')\n",
      "(0.9490175663889364, 'gossip')\n",
      "(0.8468381854216707, 'node')\n",
      "(0.8433092579925067, 'packet')\n",
      "(0.8370511942836825, 'schedule')\n",
      "(0.8344155723237219, 'multicast')\n",
      "(0.8332219451078251, 'queue')\n",
      "(0.8255453091313409, 'qos')\n",
      "(0.8182440953133643, 'location')\n"
     ]
    }
   ],
   "source": [
    "def printNMostInformative(vectorizer, clf, N):\n",
    "    feature_names = vectorizer.get_feature_names()\n",
    "    coefs_with_fns = sorted(zip(clf.coef_[0], feature_names))\n",
    "    topClass1 = coefs_with_fns[:N]\n",
    "    topClass2 = coefs_with_fns[:-(N + 1):-1]\n",
    "    print(\"Class 1 best: \")\n",
    "    for feat in topClass1:\n",
    "        print(feat)\n",
    "    print(\"Class 2 best: \")\n",
    "    for feat in topClass2:\n",
    "        print(feat)\n",
    "\n",
    "vectorizer = CountVectorizer(tokenizer=tokenizeText, ngram_range=(1,1))\n",
    "clf = LinearSVC()\n",
    "pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer), ('clf', clf)])\n",
    "\n",
    "# data\n",
    "train1 = train['Title'].tolist()\n",
    "labelsTrain1 = train['Conference'].tolist()\n",
    "\n",
    "test1 = test['Title'].tolist()\n",
    "labelsTest1 = test['Conference'].tolist()\n",
    "# train\n",
    "pipe.fit(train1, labelsTrain1)\n",
    "\n",
    "# test\n",
    "preds = pipe.predict(test1)\n",
    "print(\"accuracy:\", accuracy_score(labelsTest1, preds))\n",
    "print(\"Top 10 features used to predict: \")\n",
    "\n",
    "printNMostInformative(vectorizer, clf, 10)\n",
    "\n",
    "pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer)])\n",
    "transform = pipe.fit_transform(train1, labelsTrain1)\n",
    "vocab = vectorizer.get_feature_names()\n",
    "\n",
    "for i in range(len(train1)):\n",
    "    s = \"\"\n",
    "    indexIntoVocab = transform.indices[transform.indptr[i]:transform.indptr[i+1]]\n",
    "    numOccurences = transform.data[transform.indptr[i]:transform.indptr[i+1]]\n",
    "    for idx, num in zip(indexIntoVocab, numOccurences):\n",
    "        s += str((vocab[idx], num))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "             precision    recall  f1-score   support\n",
      "\n",
      "       VLDB       0.75      0.77      0.76       159\n",
      "      ISCAS       0.90      0.84      0.87       299\n",
      "   SIGGRAPH       0.67      0.66      0.66       106\n",
      "    INFOCOM       0.62      0.69      0.65       139\n",
      "        WWW       0.62      0.62      0.62       125\n",
      "\n",
      "avg / total       0.75      0.75      0.75       828\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn import metrics\n",
    "print(metrics.classification_report(labelsTest1, preds, \n",
    "                                    target_names=df['Conference'].unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
